STATISTICS-2: Migrate "o.a.c.math4.distribution" from Commons Math.
"IntegerDistribution" was renamed "DiscreteDistribution". "RealDistribution" was renamed "ContinuousDistribution". All exceptions are instances of "DistributionException" (package-private). Solver code (used by method "inverseCumulativeProbability") is a private static inner class in "AbstractContinuousDistribution". Tolerances are hard-coded. [Constructors that specified a tolerance were removed.] Calls to "FastMath" were replaced by calls to JDK "Math". This has led to two unit tests failing in "GammaDistributionTest" for which the tolerance had to be slightly increased. [The main source indicates which calls to "Math" methods are responsible for the failures at the original tolerance.] Project: http://git-wip-us.apache.org/repos/asf/commons-statistics/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-statistics/commit/9c794a15 Tree: http://git-wip-us.apache.org/repos/asf/commons-statistics/tree/9c794a15 Diff: http://git-wip-us.apache.org/repos/asf/commons-statistics/diff/9c794a15 Branch: refs/heads/master Commit: 9c794a15f75aafbe9d2ab4b62b7e43e1c32e7501 Parents: 585178f Author: Gilles Sadowski <[email protected]> Authored: Sun Jan 21 14:41:03 2018 +0100 Committer: Gilles Sadowski <[email protected]> Committed: Sun Jan 21 14:41:03 2018 +0100 ---------------------------------------------------------------------- commons-statistics-distribution/LICENSE.txt | 275 ++ commons-statistics-distribution/NOTICE.txt | 5 + commons-statistics-distribution/pom.xml | 86 + .../AbstractContinuousDistribution.java | 453 +++ .../AbstractDiscreteDistribution.java | 220 ++ .../distribution/BetaDistribution.java | 202 ++ .../distribution/BinomialDistribution.java | 170 + .../distribution/CauchyDistribution.java | 166 + .../distribution/ChiSquaredDistribution.java | 119 + .../ConstantContinuousDistribution.java | 116 + .../distribution/ContinuousDistribution.java | 176 + .../distribution/DiscreteDistribution.java | 163 + .../distribution/DistributionException.java | 61 + .../distribution/ExponentialDistribution.java | 197 ++ .../statistics/distribution/FDistribution.java | 209 ++ .../distribution/GammaDistribution.java | 354 ++ .../distribution/GeometricDistribution.java | 160 + .../distribution/GumbelDistribution.java | 128 + .../HypergeometricDistribution.java | 293 ++ .../distribution/LaplaceDistribution.java | 132 + .../distribution/LevyDistribution.java | 161 + .../distribution/LogNormalDistribution.java | 266 ++ .../distribution/LogisticDistribution.java | 128 + .../distribution/NakagamiDistribution.java | 117 + .../distribution/NormalDistribution.java | 216 ++ .../distribution/ParetoDistribution.java | 225 ++ .../distribution/PascalDistribution.java | 211 ++ .../distribution/PoissonDistribution.java | 238 ++ .../distribution/SaddlePointExpansion.java | 191 + .../statistics/distribution/TDistribution.java | 180 + .../distribution/TriangularDistribution.java | 222 ++ .../UniformContinuousDistribution.java | 168 + .../UniformDiscreteDistribution.java | 159 + .../distribution/WeibullDistribution.java | 220 ++ .../distribution/ZipfDistribution.java | 236 ++ .../statistics/distribution/package-info.java | 20 + .../AbstractContinuousDistributionTest.java | 209 ++ .../AbstractDiscreteDistributionTest.java | 130 + .../distribution/BetaDistributionTest.java | 381 ++ .../distribution/BinomialDistributionTest.java | 173 + .../distribution/CauchyDistributionTest.java | 111 + .../ChiSquaredDistributionTest.java | 136 + .../ConstantContinuousDistributionTest.java | 92 + .../ContinuousDistributionAbstractTest.java | 456 +++ .../DiscreteDistributionAbstractTest.java | 411 +++ .../ExponentialDistributionTest.java | 132 + .../distribution/FDistributionTest.java | 150 + .../distribution/GammaDistributionTest.java | 354 ++ .../distribution/GeometricDistributionTest.java | 167 + .../distribution/GumbelDistributionTest.java | 70 + .../HypergeometricDistributionTest.java | 335 ++ .../distribution/LaplaceDistributionTest.java | 70 + .../distribution/LevyDistributionTest.java | 81 + .../distribution/LogNormalDistributionTest.java | 250 ++ .../distribution/LogisticsDistributionTest.java | 70 + .../distribution/NakagamiDistributionTest.java | 70 + .../distribution/NormalDistributionTest.java | 213 ++ .../distribution/ParetoDistributionTest.java | 201 ++ .../distribution/PascalDistributionTest.java | 132 + .../distribution/PoissonDistributionTest.java | 244 ++ .../distribution/TDistributionTest.java | 169 + .../statistics/distribution/TestUtils.java | 281 ++ .../TriangularDistributionTest.java | 192 + .../UniformContinuousDistributionTest.java | 123 + .../UniformDiscreteDistributionTest.java | 139 + .../distribution/WeibullDistributionTest.java | 118 + .../distribution/ZipfDistributionTest.java | 166 + .../distribution/gamma-distribution-shape-1.csv | 3215 +++++++++++++++++ .../gamma-distribution-shape-10.csv | 415 +++ .../gamma-distribution-shape-100.csv | 408 +++ .../gamma-distribution-shape-1000.csv | 3325 ++++++++++++++++++ .../gamma-distribution-shape-142.csv | 775 ++++ .../distribution/gamma-distribution-shape-8.csv | 3215 +++++++++++++++++ .../distribution/gamma-distribution.mac | 73 + .../statistics/distribution/testData.txt | 1000 ++++++ pom.xml | 65 +- 76 files changed, 24949 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-statistics/blob/9c794a15/commons-statistics-distribution/LICENSE.txt ---------------------------------------------------------------------- diff --git a/commons-statistics-distribution/LICENSE.txt b/commons-statistics-distribution/LICENSE.txt new file mode 100644 index 0000000..de777e4 --- /dev/null +++ b/commons-statistics-distribution/LICENSE.txt @@ -0,0 +1,275 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +================================================================================ + +Class "org.apache.commons.rng.internal.source64.MersenneTwister64" contains +Java code partly ported from the reference implementation in C. +That source file contained the following notice: + + Copyright (C) 2004, Makoto Matsumoto and Takuji Nishimura, + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The names of its contributors may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +================================================================================ + +Class "org.apache.commons.rng.internal.source32.MersenneTwister" contains +Java code partly ported from the reference implementation in C. +That source file contained the following notice: + + Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The names of its contributors may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +================================================================================ http://git-wip-us.apache.org/repos/asf/commons-statistics/blob/9c794a15/commons-statistics-distribution/NOTICE.txt ---------------------------------------------------------------------- diff --git a/commons-statistics-distribution/NOTICE.txt b/commons-statistics-distribution/NOTICE.txt new file mode 100644 index 0000000..a67070b --- /dev/null +++ b/commons-statistics-distribution/NOTICE.txt @@ -0,0 +1,5 @@ +Apache Commons Statistics +Copyright 2018-2018 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). http://git-wip-us.apache.org/repos/asf/commons-statistics/blob/9c794a15/commons-statistics-distribution/pom.xml ---------------------------------------------------------------------- diff --git a/commons-statistics-distribution/pom.xml b/commons-statistics-distribution/pom.xml new file mode 100644 index 0000000..ed5d669 --- /dev/null +++ b/commons-statistics-distribution/pom.xml @@ -0,0 +1,86 @@ +<?xml version="1.0"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" + xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + <modelVersion>4.0.0</modelVersion> + + <parent> + <groupId>org.apache.commons</groupId> + <artifactId>commons-statistics-parent</artifactId> + <version>0.1-SNAPSHOT</version> + </parent> + + <groupId>org.apache.commons</groupId> + <artifactId>commons-statistics-distribution</artifactId> + <version>0.1-SNAPSHOT</version> + <name>Apache Commons Statistics Distribution</name> + + <description>Statistical distributions.</description> + + <properties> + <!-- This value must reflect the current name of the base package. --> + <commons.osgi.symbolicName>org.apache.commons.statistics.distribution</commons.osgi.symbolicName> + <!-- OSGi --> + <commons.osgi.export>org.apache.commons.statistics.distribution</commons.osgi.export> + <!-- Workaround to avoid duplicating config files. --> + <statistics.parent.dir>${basedir}/..</statistics.parent.dir> + </properties> + + <dependencies> + + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-rng-client-api</artifactId> + </dependency> + + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-rng-sampling</artifactId> + </dependency> + + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-numbers-core</artifactId> + </dependency> + + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-numbers-combinatorics</artifactId> + </dependency> + + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-numbers-gamma</artifactId> + </dependency> + + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-rng-simple</artifactId> + <scope>test</scope> + </dependency> + + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-math3</artifactId> + <scope>test</scope> + </dependency> + + </dependencies> + +</project> http://git-wip-us.apache.org/repos/asf/commons-statistics/blob/9c794a15/commons-statistics-distribution/src/main/java/commons/statistics/distribution/AbstractContinuousDistribution.java ---------------------------------------------------------------------- diff --git a/commons-statistics-distribution/src/main/java/commons/statistics/distribution/AbstractContinuousDistribution.java b/commons-statistics-distribution/src/main/java/commons/statistics/distribution/AbstractContinuousDistribution.java new file mode 100644 index 0000000..1d6b254 --- /dev/null +++ b/commons-statistics-distribution/src/main/java/commons/statistics/distribution/AbstractContinuousDistribution.java @@ -0,0 +1,453 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.statistics.distribution; + +import java.util.function.DoubleUnaryOperator; +import org.apache.commons.numbers.core.Precision; +import org.apache.commons.rng.UniformRandomProvider; +import org.apache.commons.rng.sampling.distribution.InverseTransformContinuousSampler; +import org.apache.commons.rng.sampling.distribution.ContinuousInverseCumulativeProbabilityFunction; +import org.apache.commons.rng.sampling.distribution.ContinuousSampler; + +/** + * Base class for probability distributions on the reals. + * Default implementations are provided for some of the methods + * that do not vary from distribution to distribution. + * + * This base class provides a default factory method for creating + * a {@link ContinuousDistribution.Sampler sampler instance} that uses the + * <a href="http://en.wikipedia.org/wiki/Inverse_transform_sampling"> + * inversion method</a> for generating random samples that follow the + * distribution. + */ +public abstract class AbstractContinuousDistribution + implements ContinuousDistribution { + /** + * For a random variable {@code X} whose values are distributed according + * to this distribution, this method returns {@code P(x0 < X <= x1)}. + * + * @param x0 Lower bound (excluded). + * @param x1 Upper bound (included). + * @return the probability that a random variable with this distribution + * takes a value between {@code x0} and {@code x1}, excluding the lower + * and including the upper endpoint. + * @throws IllegalArgumentException if {@code x0 > x1}. + * + * The default implementation uses the identity + * {@code P(x0 < X <= x1) = P(X <= x1) - P(X <= x0)} + */ + @Override + public double probability(double x0, + double x1) { + if (x0 > x1) { + throw new DistributionException(DistributionException.TOO_LARGE, x0, x1); + } + return cumulativeProbability(x1) - cumulativeProbability(x0); + } + + /** + * {@inheritDoc} + * + * The default implementation returns + * <ul> + * <li>{@link #getSupportLowerBound()} for {@code p = 0},</li> + * <li>{@link #getSupportUpperBound()} for {@code p = 1}.</li> + * </ul> + */ + @Override + public double inverseCumulativeProbability(final double p) { + /* + * IMPLEMENTATION NOTES + * -------------------- + * Where applicable, use is made of the one-sided Chebyshev inequality + * to bracket the root. This inequality states that + * P(X - mu >= k * sig) <= 1 / (1 + k^2), + * mu: mean, sig: standard deviation. Equivalently + * 1 - P(X < mu + k * sig) <= 1 / (1 + k^2), + * F(mu + k * sig) >= k^2 / (1 + k^2). + * + * For k = sqrt(p / (1 - p)), we find + * F(mu + k * sig) >= p, + * and (mu + k * sig) is an upper-bound for the root. + * + * Then, introducing Y = -X, mean(Y) = -mu, sd(Y) = sig, and + * P(Y >= -mu + k * sig) <= 1 / (1 + k^2), + * P(-X >= -mu + k * sig) <= 1 / (1 + k^2), + * P(X <= mu - k * sig) <= 1 / (1 + k^2), + * F(mu - k * sig) <= 1 / (1 + k^2). + * + * For k = sqrt((1 - p) / p), we find + * F(mu - k * sig) <= p, + * and (mu - k * sig) is a lower-bound for the root. + * + * In cases where the Chebyshev inequality does not apply, geometric + * progressions 1, 2, 4, ... and -1, -2, -4, ... are used to bracket + * the root. + */ + if (p < 0 || + p > 1) { + throw new DistributionException(DistributionException.OUT_OF_RANGE, p, 0, 1); + } + + double lowerBound = getSupportLowerBound(); + if (p == 0) { + return lowerBound; + } + + double upperBound = getSupportUpperBound(); + if (p == 1) { + return upperBound; + } + + final double mu = getNumericalMean(); + final double sig = Math.sqrt(getNumericalVariance()); + final boolean chebyshevApplies; + chebyshevApplies = !(Double.isInfinite(mu) || + Double.isNaN(mu) || + Double.isInfinite(sig) || + Double.isNaN(sig)); + + if (lowerBound == Double.NEGATIVE_INFINITY) { + if (chebyshevApplies) { + lowerBound = mu - sig * Math.sqrt((1 - p) / p); + } else { + lowerBound = -1; + while (cumulativeProbability(lowerBound) >= p) { + lowerBound *= 2; + } + } + } + + if (upperBound == Double.POSITIVE_INFINITY) { + if (chebyshevApplies) { + upperBound = mu + sig * Math.sqrt(p / (1 - p)); + } else { + upperBound = 1; + while (cumulativeProbability(upperBound) < p) { + upperBound *= 2; + } + } + } + + // XXX Values copied from defaults in class + // "o.a.c.math4.analysis.solvers.BaseAbstractUnivariateSolver" + final double solverRelativeAccuracy = 1e-14; + final double solverAbsoluteAccuracy = 1e-9; + final double solverFunctionValueAccuracy = 1e-15; + + double x = new BrentSolver(solverRelativeAccuracy, + solverAbsoluteAccuracy, + solverFunctionValueAccuracy) + .solve((arg) -> cumulativeProbability(arg) - p, + lowerBound, + 0.5 * (lowerBound + upperBound), + upperBound); + + if (!isSupportConnected()) { + /* Test for plateau. */ + final double dx = solverAbsoluteAccuracy; + if (x - dx >= getSupportLowerBound()) { + double px = cumulativeProbability(x); + if (cumulativeProbability(x - dx) == px) { + upperBound = x; + while (upperBound - lowerBound > dx) { + final double midPoint = 0.5 * (lowerBound + upperBound); + if (cumulativeProbability(midPoint) < px) { + lowerBound = midPoint; + } else { + upperBound = midPoint; + } + } + return upperBound; + } + } + } + return x; + } + + /** + * {@inheritDoc} + * + * @return zero. + */ + @Override + public double probability(double x) { + return 0; + } + + /** + * {@inheritDoc} + * + * The default implementation computes the logarithm of {@code density(x)}. + */ + @Override + public double logDensity(double x) { + return Math.log(density(x)); + } + + /** + * Utility function for allocating an array and filling it with {@code n} + * samples generated by the given {@code sampler}. + * + * @param n Number of samples. + * @param sampler Sampler. + * @return an array of size {@code n}. + */ + public static double[] sample(int n, + ContinuousDistribution.Sampler sampler) { + final double[] samples = new double[n]; + for (int i = 0; i < n; i++) { + samples[i] = sampler.sample(); + } + return samples; + } + + /**{@inheritDoc} */ + @Override + public ContinuousDistribution.Sampler createSampler(final UniformRandomProvider rng) { + return new ContinuousDistribution.Sampler() { + /** + * Inversion method distribution sampler. + */ + private final ContinuousSampler sampler = + new InverseTransformContinuousSampler(rng, createICPF()); + + /** {@inheritDoc} */ + @Override + public double sample() { + return sampler.sample(); + } + }; + } + + /** + * @return an instance for use by {@link #createSampler(UniformRandomProvider)} + */ + private ContinuousInverseCumulativeProbabilityFunction createICPF() { + return new ContinuousInverseCumulativeProbabilityFunction() { + /** {@inheritDoc} */ + @Override + public double inverseCumulativeProbability(double p) { + return AbstractContinuousDistribution.this.inverseCumulativeProbability(p); + } + }; + } + + /** + * This class implements the <a href="http://mathworld.wolfram.com/BrentsMethod.html"> + * Brent algorithm</a> for finding zeros of real univariate functions. + * The function should be continuous but not necessarily smooth. + * The {@code solve} method returns a zero {@code x} of the function {@code f} + * in the given interval {@code [a, b]} to within a tolerance + * {@code 2 eps abs(x) + t} where {@code eps} is the relative accuracy and + * {@code t} is the absolute accuracy. + * <p>The given interval must bracket the root.</p> + * <p> + * The reference implementation is given in chapter 4 of + * <blockquote> + * <b>Algorithms for Minimization Without Derivatives</b>, + * <em>Richard P. Brent</em>, + * Dover, 2002 + * </blockquote> + * + * Used by {@link #inverseCumulativeProbability(double)}. + */ + private static class BrentSolver { + /** Relative accuracy. */ + private final double relativeAccuracy; + /** Absolutee accuracy. */ + private final double absoluteAccuracy; + /** Function accuracy. */ + private final double functionValueAccuracy; + + /** + * Construct a solver. + * + * @param relativeAccuracy Relative accuracy. + * @param absoluteAccuracy Absolute accuracy. + * @param functionValueAccuracy Function value accuracy. + */ + BrentSolver(double relativeAccuracy, + double absoluteAccuracy, + double functionValueAccuracy) { + this.relativeAccuracy = relativeAccuracy; + this.absoluteAccuracy = absoluteAccuracy; + this.functionValueAccuracy = functionValueAccuracy; + } + + /** + * @param func Function to solve. + * @param min Lower bound. + * @param init Initial guess. + * @param max Upper bound. + * @return the root. + */ + double solve(DoubleUnaryOperator func, + double min, + double initial, + double max) { + if (min > max) { + throw new DistributionException(DistributionException.TOO_LARGE, min, max); + } + if (initial < min || + initial > max) { + throw new DistributionException(DistributionException.OUT_OF_RANGE, initial, min, max); + } + + // Return the initial guess if it is good enough. + double yInitial = func.applyAsDouble(initial); + if (Math.abs(yInitial) <= functionValueAccuracy) { + return initial; + } + + // Return the first endpoint if it is good enough. + double yMin = func.applyAsDouble(min); + if (Math.abs(yMin) <= functionValueAccuracy) { + return min; + } + + // Reduce interval if min and initial bracket the root. + if (yInitial * yMin < 0) { + return brent(func, min, initial, yMin, yInitial); + } + + // Return the second endpoint if it is good enough. + double yMax = func.applyAsDouble(max); + if (Math.abs(yMax) <= functionValueAccuracy) { + return max; + } + + // Reduce interval if initial and max bracket the root. + if (yInitial * yMax < 0) { + return brent(func, initial, max, yInitial, yMax); + } + + throw new DistributionException(DistributionException.BRACKETING, min, yMin, max, yMax); + } + + /** + * Search for a zero inside the provided interval. + * This implementation is based on the algorithm described at page 58 of + * the book + * <blockquote> + * <b>Algorithms for Minimization Without Derivatives</b>, + * <it>Richard P. Brent</it>, + * Dover 0-486-41998-3 + * </blockquote> + * + * @param func Function to solve. + * @param lo Lower bound of the search interval. + * @param hi Higher bound of the search interval. + * @param fLo Function value at the lower bound of the search interval. + * @param fHi Function value at the higher bound of the search interval. + * @return the value where the function is zero. + */ + private double brent(DoubleUnaryOperator func, + double lo, double hi, + double fLo, double fHi) { + double a = lo; + double fa = fLo; + double b = hi; + double fb = fHi; + double c = a; + double fc = fa; + double d = b - a; + double e = d; + + final double t = absoluteAccuracy; + final double eps = relativeAccuracy; + + while (true) { + if (Math.abs(fc) < Math.abs(fb)) { + a = b; + b = c; + c = a; + fa = fb; + fb = fc; + fc = fa; + } + + final double tol = 2 * eps * Math.abs(b) + t; + final double m = 0.5 * (c - b); + + if (Math.abs(m) <= tol || + Precision.equals(fb, 0)) { + return b; + } + if (Math.abs(e) < tol || + Math.abs(fa) <= Math.abs(fb)) { + // Force bisection. + d = m; + e = d; + } else { + double s = fb / fa; + double p; + double q; + // The equality test (a == c) is intentional, + // it is part of the original Brent's method and + // it should NOT be replaced by proximity test. + if (a == c) { + // Linear interpolation. + p = 2 * m * s; + q = 1 - s; + } else { + // Inverse quadratic interpolation. + q = fa / fc; + final double r = fb / fc; + p = s * (2 * m * q * (q - r) - (b - a) * (r - 1)); + q = (q - 1) * (r - 1) * (s - 1); + } + if (p > 0) { + q = -q; + } else { + p = -p; + } + s = e; + e = d; + if (p >= 1.5 * m * q - Math.abs(tol * q) || + p >= Math.abs(0.5 * s * q)) { + // Inverse quadratic interpolation gives a value + // in the wrong direction, or progress is slow. + // Fall back to bisection. + d = m; + e = d; + } else { + d = p / q; + } + } + a = b; + fa = fb; + + if (Math.abs(d) > tol) { + b += d; + } else if (m > 0) { + b += tol; + } else { + b -= tol; + } + fb = func.applyAsDouble(b); + if ((fb > 0 && fc > 0) || + (fb <= 0 && fc <= 0)) { + c = a; + fc = fa; + d = b - a; + e = d; + } + } + } + } +} http://git-wip-us.apache.org/repos/asf/commons-statistics/blob/9c794a15/commons-statistics-distribution/src/main/java/commons/statistics/distribution/AbstractDiscreteDistribution.java ---------------------------------------------------------------------- diff --git a/commons-statistics-distribution/src/main/java/commons/statistics/distribution/AbstractDiscreteDistribution.java b/commons-statistics-distribution/src/main/java/commons/statistics/distribution/AbstractDiscreteDistribution.java new file mode 100644 index 0000000..faef96c --- /dev/null +++ b/commons-statistics-distribution/src/main/java/commons/statistics/distribution/AbstractDiscreteDistribution.java @@ -0,0 +1,220 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.statistics.distribution; + +import org.apache.commons.rng.UniformRandomProvider; +import org.apache.commons.rng.sampling.distribution.InverseTransformDiscreteSampler; +import org.apache.commons.rng.sampling.distribution.DiscreteInverseCumulativeProbabilityFunction; +import org.apache.commons.rng.sampling.distribution.DiscreteSampler; + +/** + * Base class for integer-valued discrete distributions. Default + * implementations are provided for some of the methods that do not vary + * from distribution to distribution. + */ +public abstract class AbstractDiscreteDistribution + implements DiscreteDistribution { + /** + * {@inheritDoc} + * + * The default implementation uses the identity + * {@code P(x0 < X <= x1) = P(X <= x1) - P(X <= x0)} + */ + @Override + public double probability(int x0, + int x1) { + if (x1 < x0) { + throw new DistributionException(DistributionException.TOO_SMALL, + x1, x0); + } + return cumulativeProbability(x1) - cumulativeProbability(x0); + } + + /** + * {@inheritDoc} + * + * The default implementation returns + * <ul> + * <li>{@link #getSupportLowerBound()} for {@code p = 0},</li> + * <li>{@link #getSupportUpperBound()} for {@code p = 1}, and</li> + * <li>{@link #solveInverseCumulativeProbability(double, int, int)} for + * {@code 0 < p < 1}.</li> + * </ul> + */ + @Override + public int inverseCumulativeProbability(final double p) { + if (p < 0 || + p > 1) { + throw new DistributionException(DistributionException.OUT_OF_RANGE, p, 0, 1); + } + + int lower = getSupportLowerBound(); + if (p == 0.0) { + return lower; + } + if (lower == Integer.MIN_VALUE) { + if (checkedCumulativeProbability(lower) >= p) { + return lower; + } + } else { + lower -= 1; // this ensures cumulativeProbability(lower) < p, which + // is important for the solving step + } + + int upper = getSupportUpperBound(); + if (p == 1.0) { + return upper; + } + + // use the one-sided Chebyshev inequality to narrow the bracket + // cf. AbstractRealDistribution.inverseCumulativeProbability(double) + final double mu = getNumericalMean(); + final double sigma = Math.sqrt(getNumericalVariance()); + final boolean chebyshevApplies = !(Double.isInfinite(mu) || + Double.isNaN(mu) || + Double.isInfinite(sigma) || + Double.isNaN(sigma) || + sigma == 0.0); + if (chebyshevApplies) { + double k = Math.sqrt((1.0 - p) / p); + double tmp = mu - k * sigma; + if (tmp > lower) { + lower = ((int) Math.ceil(tmp)) - 1; + } + k = 1.0 / k; + tmp = mu + k * sigma; + if (tmp < upper) { + upper = ((int) Math.ceil(tmp)) - 1; + } + } + + return solveInverseCumulativeProbability(p, lower, upper); + } + + /** + * This is a utility function used by {@link + * #inverseCumulativeProbability(double)}. It assumes {@code 0 < p < 1} and + * that the inverse cumulative probability lies in the bracket {@code + * (lower, upper]}. The implementation does simple bisection to find the + * smallest {@code p}-quantile {@code inf{x in Z | P(X <= x) >= p}}. + * + * @param p Cumulative probability. + * @param lower Value satisfying {@code cumulativeProbability(lower) < p}. + * @param upper Value satisfying {@code p <= cumulativeProbability(upper)}. + * @return the smallest {@code p}-quantile of this distribution. + */ + private int solveInverseCumulativeProbability(final double p, + int lower, + int upper) { + while (lower + 1 < upper) { + int xm = (lower + upper) / 2; + if (xm < lower || xm > upper) { + /* + * Overflow. + * There will never be an overflow in both calculation methods + * for xm at the same time + */ + xm = lower + (upper - lower) / 2; + } + + double pm = checkedCumulativeProbability(xm); + if (pm >= p) { + upper = xm; + } else { + lower = xm; + } + } + return upper; + } + + /** + * Computes the cumulative probability function and checks for {@code NaN} + * values returned. Throws {@code MathInternalError} if the value is + * {@code NaN}. Rethrows any exception encountered evaluating the cumulative + * probability function. Throws {@code MathInternalError} if the cumulative + * probability function returns {@code NaN}. + * + * @param argument Input value. + * @return the cumulative probability. + * @throws IllegalStateException if the cumulative probability is {@code NaN}. + */ + private double checkedCumulativeProbability(int argument) { + final double result = cumulativeProbability(argument); + if (Double.isNaN(result)) { + throw new IllegalStateException("Internal error"); + } + return result; + } + + /** + * {@inheritDoc} + * + * The default implementation simply computes the logarithm of {@code probability(x)}. + */ + @Override + public double logProbability(int x) { + return Math.log(probability(x)); + } + + /** + * Utility function for allocating an array and filling it with {@code n} + * samples generated by the given {@code sampler}. + * + * @param n Number of samples. + * @param sampler Sampler. + * @return an array of size {@code n}. + */ + public static int[] sample(int n, + DiscreteDistribution.Sampler sampler) { + final int[] samples = new int[n]; + for (int i = 0; i < n; i++) { + samples[i] = sampler.sample(); + } + return samples; + } + + /** {@inheritDoc} */ + @Override + public DiscreteDistribution.Sampler createSampler(final UniformRandomProvider rng) { + return new DiscreteDistribution.Sampler() { + /** + * Inversion method distribution sampler. + */ + private final DiscreteSampler sampler = + new InverseTransformDiscreteSampler(rng, createICPF()); + + /** {@inheritDoc} */ + @Override + public int sample() { + return sampler.sample(); + } + }; + } + + /** + * @return an instance for use by {@link #createSampler(UniformRandomProvider)}. + */ + private DiscreteInverseCumulativeProbabilityFunction createICPF() { + return new DiscreteInverseCumulativeProbabilityFunction() { + /** {@inheritDoc} */ + @Override + public int inverseCumulativeProbability(double p) { + return AbstractDiscreteDistribution.this.inverseCumulativeProbability(p); + } + }; + } +} http://git-wip-us.apache.org/repos/asf/commons-statistics/blob/9c794a15/commons-statistics-distribution/src/main/java/commons/statistics/distribution/BetaDistribution.java ---------------------------------------------------------------------- diff --git a/commons-statistics-distribution/src/main/java/commons/statistics/distribution/BetaDistribution.java b/commons-statistics-distribution/src/main/java/commons/statistics/distribution/BetaDistribution.java new file mode 100644 index 0000000..0a07b49 --- /dev/null +++ b/commons-statistics-distribution/src/main/java/commons/statistics/distribution/BetaDistribution.java @@ -0,0 +1,202 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.statistics.distribution; + +import org.apache.commons.numbers.gamma.RegularizedBeta; +import org.apache.commons.numbers.gamma.LogGamma; +import org.apache.commons.rng.UniformRandomProvider; +import org.apache.commons.rng.sampling.distribution.ContinuousSampler; +import org.apache.commons.rng.sampling.distribution.ChengBetaSampler; + +/** + * Implementation of the <a href="http://en.wikipedia.org/wiki/Beta_distribution">Beta distribution</a>. + */ +public class BetaDistribution extends AbstractContinuousDistribution { + /** First shape parameter. */ + private final double alpha; + /** Second shape parameter. */ + private final double beta; + /** Normalizing factor used in density computations.*/ + private final double z; + + /** + * Creates a new instance. + * + * @param alpha First shape parameter (must be positive). + * @param beta Second shape parameter (must be positive). + */ + public BetaDistribution(double alpha, + double beta) { + this.alpha = alpha; + this.beta = beta; + z = LogGamma.value(alpha) + LogGamma.value(beta) - LogGamma.value(alpha + beta); + } + + /** + * Access the first shape parameter, {@code alpha}. + * + * @return the first shape parameter. + */ + public double getAlpha() { + return alpha; + } + + /** + * Access the second shape parameter, {@code beta}. + * + * @return the second shape parameter. + */ + public double getBeta() { + return beta; + } + + /** {@inheritDoc} */ + @Override + public double density(double x) { + final double logDensity = logDensity(x); + return logDensity == Double.NEGATIVE_INFINITY ? 0 : Math.exp(logDensity); + } + + /** {@inheritDoc} **/ + @Override + public double logDensity(double x) { + if (x < 0 || + x > 1) { + return Double.NEGATIVE_INFINITY; + } else if (x == 0) { + if (alpha < 1) { + throw new DistributionException(DistributionException.TOO_SMALL, + alpha, 1); + } + return Double.NEGATIVE_INFINITY; + } else if (x == 1) { + if (beta < 1) { + throw new DistributionException(DistributionException.TOO_SMALL, + beta, 1); + } + return Double.NEGATIVE_INFINITY; + } else { + double logX = Math.log(x); + double log1mX = Math.log1p(-x); + return (alpha - 1) * logX + (beta - 1) * log1mX - z; + } + } + + /** {@inheritDoc} */ + @Override + public double cumulativeProbability(double x) { + if (x <= 0) { + return 0; + } else if (x >= 1) { + return 1; + } else { + return RegularizedBeta.value(x, alpha, beta); + } + } + + /** + * {@inheritDoc} + * + * For first shape parameter {@code alpha} and second shape parameter + * {@code beta}, the mean is {@code alpha / (alpha + beta)}. + */ + @Override + public double getNumericalMean() { + final double a = getAlpha(); + return a / (a + getBeta()); + } + + /** + * {@inheritDoc} + * + * For first shape parameter {@code alpha} and second shape parameter + * {@code beta}, the variance is + * {@code (alpha * beta) / [(alpha + beta)^2 * (alpha + beta + 1)]}. + */ + @Override + public double getNumericalVariance() { + final double a = getAlpha(); + final double b = getBeta(); + final double alphabetasum = a + b; + return (a * b) / ((alphabetasum * alphabetasum) * (alphabetasum + 1)); + } + + /** + * {@inheritDoc} + * + * The lower bound of the support is always 0 no matter the parameters. + * + * @return lower bound of the support (always 0) + */ + @Override + public double getSupportLowerBound() { + return 0; + } + + /** + * {@inheritDoc} + * + * The upper bound of the support is always 1 no matter the parameters. + * + * @return upper bound of the support (always 1) + */ + @Override + public double getSupportUpperBound() { + return 1; + } + + /** + * {@inheritDoc} + * + * The support of this distribution is connected. + * + * @return {@code true} + */ + @Override + public boolean isSupportConnected() { + return true; + } + + /** + * {@inheritDoc} + * + * Sampling is performed using Cheng's algorithm: + * <blockquote> + * <pre> + * R. C. H. Cheng, + * "Generating beta variates with nonintegral shape parameters", + * Communications of the ACM, 21, 317-322, 1978. + * </pre> + * </blockquote> + */ + @Override + public ContinuousDistribution.Sampler createSampler(final UniformRandomProvider rng) { + return new ContinuousDistribution.Sampler() { + /** + * Beta distribution sampler. + */ + private final ContinuousSampler sampler = + new ChengBetaSampler(rng, alpha, beta); + + /**{@inheritDoc} */ + @Override + public double sample() { + return sampler.sample(); + } + }; + } +} http://git-wip-us.apache.org/repos/asf/commons-statistics/blob/9c794a15/commons-statistics-distribution/src/main/java/commons/statistics/distribution/BinomialDistribution.java ---------------------------------------------------------------------- diff --git a/commons-statistics-distribution/src/main/java/commons/statistics/distribution/BinomialDistribution.java b/commons-statistics-distribution/src/main/java/commons/statistics/distribution/BinomialDistribution.java new file mode 100644 index 0000000..7177968 --- /dev/null +++ b/commons-statistics-distribution/src/main/java/commons/statistics/distribution/BinomialDistribution.java @@ -0,0 +1,170 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.statistics.distribution; + +import org.apache.commons.numbers.gamma.RegularizedBeta; + +/** + * Implementation of the <a href="http://en.wikipedia.org/wiki/Binomial_distribution">binomial distribution</a>. + */ +public class BinomialDistribution extends AbstractDiscreteDistribution { + /** The number of trials. */ + private final int numberOfTrials; + /** The probability of success. */ + private final double probabilityOfSuccess; + + /** + * Creates a binomial distribution. + * + * @param trials Number of trials. + * @param p Probability of success. + * @throws IllegalArgumentException if {@code trials < 0}, or if + * {@code p < 0} or {@code p > 1}. + */ + public BinomialDistribution(int trials, + double p) { + if (trials < 0) { + throw new DistributionException(DistributionException.NEGATIVE, + trials); + } + if (p < 0 || + p > 1) { + throw new DistributionException(DistributionException.OUT_OF_RANGE, p, 0, 1); + } + + probabilityOfSuccess = p; + numberOfTrials = trials; + } + + /** + * Access the number of trials for this distribution. + * + * @return the number of trials. + */ + public int getNumberOfTrials() { + return numberOfTrials; + } + + /** + * Access the probability of success for this distribution. + * + * @return the probability of success. + */ + public double getProbabilityOfSuccess() { + return probabilityOfSuccess; + } + + /** {@inheritDoc} */ + @Override + public double probability(int x) { + final double logProbability = logProbability(x); + return logProbability == Double.NEGATIVE_INFINITY ? 0 : Math.exp(logProbability); + } + + /** {@inheritDoc} **/ + @Override + public double logProbability(int x) { + if (numberOfTrials == 0) { + return (x == 0) ? 0. : Double.NEGATIVE_INFINITY; + } + double ret; + if (x < 0 || x > numberOfTrials) { + ret = Double.NEGATIVE_INFINITY; + } else { + ret = SaddlePointExpansion.logBinomialProbability(x, + numberOfTrials, probabilityOfSuccess, + 1.0 - probabilityOfSuccess); + } + return ret; + } + + /** {@inheritDoc} */ + @Override + public double cumulativeProbability(int x) { + double ret; + if (x < 0) { + ret = 0.0; + } else if (x >= numberOfTrials) { + ret = 1.0; + } else { + ret = 1.0 - RegularizedBeta.value(probabilityOfSuccess, + x + 1.0, numberOfTrials - x); + } + return ret; + } + + /** + * {@inheritDoc} + * + * For {@code n} trials and probability parameter {@code p}, the mean is + * {@code n * p}. + */ + @Override + public double getNumericalMean() { + return numberOfTrials * probabilityOfSuccess; + } + + /** + * {@inheritDoc} + * + * For {@code n} trials and probability parameter {@code p}, the variance is + * {@code n * p * (1 - p)}. + */ + @Override + public double getNumericalVariance() { + final double p = probabilityOfSuccess; + return numberOfTrials * p * (1 - p); + } + + /** + * {@inheritDoc} + * + * The lower bound of the support is always 0 except for the probability + * parameter {@code p = 1}. + * + * @return lower bound of the support (0 or the number of trials) + */ + @Override + public int getSupportLowerBound() { + return probabilityOfSuccess < 1.0 ? 0 : numberOfTrials; + } + + /** + * {@inheritDoc} + * + * The upper bound of the support is the number of trials except for the + * probability parameter {@code p = 0}. + * + * @return upper bound of the support (number of trials or 0) + */ + @Override + public int getSupportUpperBound() { + return probabilityOfSuccess > 0.0 ? numberOfTrials : 0; + } + + /** + * {@inheritDoc} + * + * The support of this distribution is connected. + * + * @return {@code true} + */ + @Override + public boolean isSupportConnected() { + return true; + } +} http://git-wip-us.apache.org/repos/asf/commons-statistics/blob/9c794a15/commons-statistics-distribution/src/main/java/commons/statistics/distribution/CauchyDistribution.java ---------------------------------------------------------------------- diff --git a/commons-statistics-distribution/src/main/java/commons/statistics/distribution/CauchyDistribution.java b/commons-statistics-distribution/src/main/java/commons/statistics/distribution/CauchyDistribution.java new file mode 100644 index 0000000..a7b6c64 --- /dev/null +++ b/commons-statistics-distribution/src/main/java/commons/statistics/distribution/CauchyDistribution.java @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.statistics.distribution; + +/** + * Implementation of the <a href="http://en.wikipedia.org/wiki/Cauchy_distribution">Cauchy distribution</a>. + */ +public class CauchyDistribution extends AbstractContinuousDistribution { + /** The median of this distribution. */ + private final double median; + /** The scale of this distribution. */ + private final double scale; + + /** + * Creates a Cauchy distribution with the median equal to zero and scale + * equal to one. + */ + public CauchyDistribution() { + this(0, 1); + } + + /** + * Creates a distribution. + * + * @param median Median for this distribution. + * @param scale Scale parameter for this distribution. + * @throws IllegalArgumentException if {@code scale <= 0}. + */ + public CauchyDistribution(double median, + double scale) { + if (scale <= 0) { + throw new DistributionException(DistributionException.NEGATIVE, scale); + } + this.scale = scale; + this.median = median; + } + + /** {@inheritDoc} */ + @Override + public double cumulativeProbability(double x) { + return 0.5 + (Math.atan((x - median) / scale) / Math.PI); + } + + /** + * Access the median. + * + * @return the median for this distribution. + */ + public double getMedian() { + return median; + } + + /** + * Access the scale parameter. + * + * @return the scale parameter for this distribution. + */ + public double getScale() { + return scale; + } + + /** {@inheritDoc} */ + @Override + public double density(double x) { + final double dev = x - median; + return (1 / Math.PI) * (scale / (dev * dev + scale * scale)); + } + + /** + * {@inheritDoc} + * + * Returns {@code Double.NEGATIVE_INFINITY} when {@code p == 0} + * and {@code Double.POSITIVE_INFINITY} when {@code p == 1}. + */ + @Override + public double inverseCumulativeProbability(double p) { + double ret; + if (p < 0 || + p > 1) { + throw new DistributionException(DistributionException.OUT_OF_RANGE, p, 0, 1); + } else if (p == 0) { + ret = Double.NEGATIVE_INFINITY; + } else if (p == 1) { + ret = Double.POSITIVE_INFINITY; + } else { + ret = median + scale * Math.tan(Math.PI * (p - .5)); + } + return ret; + } + + /** + * {@inheritDoc} + * + * The mean is always undefined no matter the parameters. + * + * @return mean (always Double.NaN) + */ + @Override + public double getNumericalMean() { + return Double.NaN; + } + + /** + * {@inheritDoc} + * + * The variance is always undefined no matter the parameters. + * + * @return variance (always Double.NaN) + */ + @Override + public double getNumericalVariance() { + return Double.NaN; + } + + /** + * {@inheritDoc} + * + * The lower bound of the support is always negative infinity no matter + * the parameters. + * + * @return lower bound of the support (always Double.NEGATIVE_INFINITY) + */ + @Override + public double getSupportLowerBound() { + return Double.NEGATIVE_INFINITY; + } + + /** + * {@inheritDoc} + * + * The upper bound of the support is always positive infinity no matter + * the parameters. + * + * @return upper bound of the support (always Double.POSITIVE_INFINITY) + */ + @Override + public double getSupportUpperBound() { + return Double.POSITIVE_INFINITY; + } + + /** + * {@inheritDoc} + * + * The support of this distribution is connected. + * + * @return {@code true} + */ + @Override + public boolean isSupportConnected() { + return true; + } +} http://git-wip-us.apache.org/repos/asf/commons-statistics/blob/9c794a15/commons-statistics-distribution/src/main/java/commons/statistics/distribution/ChiSquaredDistribution.java ---------------------------------------------------------------------- diff --git a/commons-statistics-distribution/src/main/java/commons/statistics/distribution/ChiSquaredDistribution.java b/commons-statistics-distribution/src/main/java/commons/statistics/distribution/ChiSquaredDistribution.java new file mode 100644 index 0000000..5f31254 --- /dev/null +++ b/commons-statistics-distribution/src/main/java/commons/statistics/distribution/ChiSquaredDistribution.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.statistics.distribution; + +/** + * Implementation of the <a href="http://en.wikipedia.org/wiki/Chi-squared_distribution">chi-squared distribution</a>. + */ +public class ChiSquaredDistribution extends AbstractContinuousDistribution { + /** Internal Gamma distribution. */ + private final GammaDistribution gamma; + + /** + * Creates a distribution. + * + * @param degreesOfFreedom Degrees of freedom. + */ + public ChiSquaredDistribution(double degreesOfFreedom) { + gamma = new GammaDistribution(degreesOfFreedom / 2, 2); + } + + /** + * Access the number of degrees of freedom. + * + * @return the degrees of freedom. + */ + public double getDegreesOfFreedom() { + return gamma.getShape() * 2; + } + + /** {@inheritDoc} */ + @Override + public double density(double x) { + return gamma.density(x); + } + + /** {@inheritDoc} **/ + @Override + public double logDensity(double x) { + return gamma.logDensity(x); + } + + /** {@inheritDoc} */ + @Override + public double cumulativeProbability(double x) { + return gamma.cumulativeProbability(x); + } + + /** + * {@inheritDoc} + * + * For {@code k} degrees of freedom, the mean is {@code k}. + */ + @Override + public double getNumericalMean() { + return getDegreesOfFreedom(); + } + + /** + * {@inheritDoc} + * + * @return {@code 2 * k}, where {@code k} is the number of degrees of freedom. + */ + @Override + public double getNumericalVariance() { + return 2 * getDegreesOfFreedom(); + } + + /** + * {@inheritDoc} + * + * The lower bound of the support is always 0 no matter the + * degrees of freedom. + * + * @return zero. + */ + @Override + public double getSupportLowerBound() { + return 0; + } + + /** + * {@inheritDoc} + * + * The upper bound of the support is always positive infinity no matter the + * degrees of freedom. + * + * @return {@code Double.POSITIVE_INFINITY}. + */ + @Override + public double getSupportUpperBound() { + return Double.POSITIVE_INFINITY; + } + + /** + * {@inheritDoc} + * + * The support of this distribution is connected. + * + * @return {@code true} + */ + @Override + public boolean isSupportConnected() { + return true; + } +} http://git-wip-us.apache.org/repos/asf/commons-statistics/blob/9c794a15/commons-statistics-distribution/src/main/java/commons/statistics/distribution/ConstantContinuousDistribution.java ---------------------------------------------------------------------- diff --git a/commons-statistics-distribution/src/main/java/commons/statistics/distribution/ConstantContinuousDistribution.java b/commons-statistics-distribution/src/main/java/commons/statistics/distribution/ConstantContinuousDistribution.java new file mode 100644 index 0000000..54694f1 --- /dev/null +++ b/commons-statistics-distribution/src/main/java/commons/statistics/distribution/ConstantContinuousDistribution.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.statistics.distribution; + +import org.apache.commons.rng.UniformRandomProvider; + +/** + * Implementation of the constant real distribution. + */ +public class ConstantContinuousDistribution extends AbstractContinuousDistribution { + /** Constant value of the distribution. */ + private final double value; + + /** + * Create a constant real distribution with the given value. + * + * @param value Value of this distribution. + */ + public ConstantContinuousDistribution(double value) { + this.value = value; + } + + /** {@inheritDoc} */ + @Override + public double density(double x) { + return x == value ? 1 : 0; + } + + /** {@inheritDoc} */ + @Override + public double cumulativeProbability(double x) { + return x < value ? 0 : 1; + } + + /** {@inheritDoc} */ + @Override + public double inverseCumulativeProbability(final double p) { + if (p < 0 || + p > 1) { + throw new DistributionException(DistributionException.OUT_OF_RANGE, p, 0, 1); + } + return value; + } + + /** + * {@inheritDoc} + */ + @Override + public double getNumericalMean() { + return value; + } + + /** + * {@inheritDoc} + */ + @Override + public double getNumericalVariance() { + return 0; + } + + /** + * {@inheritDoc} + */ + @Override + public double getSupportLowerBound() { + return value; + } + + /** + * {@inheritDoc} + */ + @Override + public double getSupportUpperBound() { + return value; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean isSupportConnected() { + return true; + } + + /** + * {@inheritDoc} + * + * @param rng Not used: distribution contains a single value. + * @return the value of the distribution. + */ + @Override + public ContinuousDistribution.Sampler createSampler(final UniformRandomProvider rng) { + return new ContinuousDistribution.Sampler() { + /** {@inheritDoc} */ + @Override + public double sample() { + return value; + } + }; + } +} http://git-wip-us.apache.org/repos/asf/commons-statistics/blob/9c794a15/commons-statistics-distribution/src/main/java/commons/statistics/distribution/ContinuousDistribution.java ---------------------------------------------------------------------- diff --git a/commons-statistics-distribution/src/main/java/commons/statistics/distribution/ContinuousDistribution.java b/commons-statistics-distribution/src/main/java/commons/statistics/distribution/ContinuousDistribution.java new file mode 100644 index 0000000..b08f75a --- /dev/null +++ b/commons-statistics-distribution/src/main/java/commons/statistics/distribution/ContinuousDistribution.java @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.statistics.distribution; + +import org.apache.commons.rng.UniformRandomProvider; + +/** + * Base interface for distributions on the reals. + */ +public interface ContinuousDistribution { + /** + * For a random variable {@code X} whose values are distributed according + * to this distribution, this method returns {@code P(X = x)}. In other + * words, this method represents the probability mass function (PMF) + * for the distribution. + * + * @param x the point at which the PMF is evaluated + * @return the value of the probability mass function at point {@code x} + */ + double probability(double x); + + /** + * For a random variable {@code X} whose values are distributed according + * to this distribution, this method returns {@code P(x0 < X <= x1)}. + * + * @param x0 the exclusive lower bound + * @param x1 the inclusive upper bound + * @return the probability that a random variable with this distribution + * takes a value between {@code x0} and {@code x1}, + * excluding the lower and including the upper endpoint + * @throws IllegalArgumentException if {@code x0 > x1} + */ + double probability(double x0, double x1); + + /** + * Returns the probability density function (PDF) of this distribution + * evaluated at the specified point {@code x}. In general, the PDF is + * the derivative of the {@link #cumulativeProbability(double) CDF}. + * If the derivative does not exist at {@code x}, then an appropriate + * replacement should be returned, e.g. {@code Double.POSITIVE_INFINITY}, + * {@code Double.NaN}, or the limit inferior or limit superior of the + * difference quotient. + * + * @param x the point at which the PDF is evaluated + * @return the value of the probability density function at point {@code x} + */ + double density(double x); + + /** + * Returns the natural logarithm of the probability density function + * (PDF) of this distribution evaluated at the specified point {@code x}. + * In general, the PDF is the derivative of the {@link #cumulativeProbability(double) CDF}. + * If the derivative does not exist at {@code x}, then an appropriate replacement + * should be returned, e.g. {@code Double.POSITIVE_INFINITY}, {@code Double.NaN}, + * or the limit inferior or limit superior of the difference quotient. Note that + * due to the floating point precision and under/overflow issues, this method will + * for some distributions be more precise and faster than computing the logarithm of + * {@link #density(double)}. + * + * @param x the point at which the PDF is evaluated + * @return the logarithm of the value of the probability density function at point {@code x} + */ + double logDensity(double x); + + /** + * For a random variable {@code X} whose values are distributed according + * to this distribution, this method returns {@code P(X <= x)}. In other + * words, this method represents the (cumulative) distribution function + * (CDF) for this distribution. + * + * @param x the point at which the CDF is evaluated + * @return the probability that a random variable with this + * distribution takes a value less than or equal to {@code x} + */ + double cumulativeProbability(double x); + + /** + * Computes the quantile function of this distribution. For a random + * variable {@code X} distributed according to this distribution, the + * returned value is + * <ul> + * <li>{@code inf{x in R | P(X<=x) >= p}} for {@code 0 < p <= 1},</li> + * <li>{@code inf{x in R | P(X<=x) > 0}} for {@code p = 0}.</li> + * </ul> + * + * @param p the cumulative probability + * @return the smallest {@code p}-quantile of this distribution + * (largest 0-quantile for {@code p = 0}) + * @throws IllegalArgumentException if {@code p < 0} or {@code p > 1} + */ + double inverseCumulativeProbability(double p); + + /** + * Use this method to get the numerical value of the mean of this + * distribution. + * + * @return the mean or {@code Double.NaN} if it is not defined + */ + double getNumericalMean(); + + /** + * Use this method to get the numerical value of the variance of this + * distribution. + * + * @return the variance (possibly {@code Double.POSITIVE_INFINITY} as + * for certain cases in {@link TDistribution}) or {@code Double.NaN} if it + * is not defined + */ + double getNumericalVariance(); + + /** + * Access the lower bound of the support. This method must return the same + * value as {@code inverseCumulativeProbability(0)}. In other words, this + * method must return + * <p>{@code inf {x in R | P(X <= x) > 0}}.</p> + * + * @return lower bound of the support (might be + * {@code Double.NEGATIVE_INFINITY}) + */ + double getSupportLowerBound(); + + /** + * Access the upper bound of the support. This method must return the same + * value as {@code inverseCumulativeProbability(1)}. In other words, this + * method must return + * <p>{@code inf {x in R | P(X <= x) = 1}}.</p> + * + * @return upper bound of the support (might be + * {@code Double.POSITIVE_INFINITY}) + */ + double getSupportUpperBound(); + + /** + * Use this method to get information about whether the support is connected, + * i.e. whether all values between the lower and upper bound of the support + * are included in the support. + * + * @return whether the support is connected or not + */ + boolean isSupportConnected(); + + /** + * Creates a sampler. + * + * @param rng Generator of uniformly distributed numbers. + * @return a sampler that produces random numbers according this + * distribution. + */ + Sampler createSampler(UniformRandomProvider rng); + + /** + * Sampling functionality. + */ + interface Sampler { + /** + * Generates a random value sampled from this distribution. + * + * @return a random value. + */ + double sample(); + } +}
