Repository: commons-math Updated Branches: refs/heads/MATH_3_X 7a6aa92c8 -> 49b4bbd41
[MATH-1242] fixed shuffle algorithm used by the Monte Carlo KS statistic calculation method, moved shuffle algorithm to static package-private method that is now explicitly tested by a unit test Project: http://git-wip-us.apache.org/repos/asf/commons-math/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-math/commit/49b4bbd4 Tree: http://git-wip-us.apache.org/repos/asf/commons-math/tree/49b4bbd4 Diff: http://git-wip-us.apache.org/repos/asf/commons-math/diff/49b4bbd4 Branch: refs/heads/MATH_3_X Commit: 49b4bbd41b116c55083300d864e824e22b7127df Parents: 7a6aa92 Author: Otmar Ertl <[email protected]> Authored: Fri Jul 17 21:43:43 2015 +0200 Committer: Otmar Ertl <[email protected]> Committed: Fri Jul 17 21:43:43 2015 +0200 ---------------------------------------------------------------------- .../stat/inference/KolmogorovSmirnovTest.java | 30 +++++++---- .../inference/KolmogorovSmirnovTestTest.java | 52 ++++++++++++++++++++ 2 files changed, 73 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-math/blob/49b4bbd4/src/main/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTest.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTest.java b/src/main/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTest.java index f32dbf3..3f868b5 100644 --- a/src/main/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTest.java +++ b/src/main/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTest.java @@ -934,6 +934,26 @@ public class KolmogorovSmirnovTest { } /** + * Fills a boolean array randomly with a fixed number of {@code true} values. + * The method uses a simplified version of the Fisher-Yates shuffle algorithm. + * By processing first the {@code true} values followed by the remaining {@code false} values + * less random numbers need to be generated. The method is optimized for the case + * that the number of {@code true} values is larger than or equal to the number of + * {@code false} values. + * + * @param b boolean array + * @param numberOfTrueValues number of {@code true} values the boolean array should finally have + * @param rng random data generator + */ + static void fillBooleanArrayRandomlyWithFixedNumberTrueValues(final boolean[] b, final int numberOfTrueValues, final RandomGenerator rng) { + Arrays.fill(b, true); + for (int k = numberOfTrueValues; k < b.length; k++) { + final int r = rng.nextInt(k + 1); + b[(b[r]) ? r : k] = false; + } + } + + /** * Uses Monte Carlo simulation to approximate \(P(D_{n,m} > d)\) where \(D_{n,m}\) is the * 2-sample Kolmogorov-Smirnov statistic. See * {@link #kolmogorovSmirnovStatistic(double[], double[])} for the definition of \(D_{n,m}\). @@ -963,15 +983,7 @@ public class KolmogorovSmirnovTest { int tail = 0; final boolean b[] = new boolean[sum]; for (int i = 0; i < iterations; i++) { - // Shuffle n true values and m false values using Fisher-Yates shuffle algorithm. - // By processing first the n true values followed by the m false values the - // shuffle algorithm can be simplified annd requires fewer random numbers. - Arrays.fill(b, true); - for (int k = nn; k < sum; k++) { - int r = rng.nextInt(k); - b[(b[r]) ? r : k] = false; - } - + fillBooleanArrayRandomlyWithFixedNumberTrueValues(b, nn, rng); int rankN = b[0] ? 1 : 0; int rankM = b[0] ? 0 : 1; boolean previous = b[0]; http://git-wip-us.apache.org/repos/asf/commons-math/blob/49b4bbd4/src/test/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTestTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTestTest.java b/src/test/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTestTest.java index 9d0d669..808c0d4 100644 --- a/src/test/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTestTest.java +++ b/src/test/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTestTest.java @@ -21,7 +21,9 @@ import java.util.Arrays; import org.apache.commons.math3.distribution.NormalDistribution; import org.apache.commons.math3.distribution.UniformRealDistribution; +import org.apache.commons.math3.random.RandomGenerator; import org.apache.commons.math3.random.Well19937c; +import org.apache.commons.math3.util.CombinatoricsUtils; import org.apache.commons.math3.util.FastMath; import org.junit.Assert; import org.junit.Test; @@ -479,6 +481,56 @@ public class KolmogorovSmirnovTestTest { Assert.assertEquals(0.015151515151515027, test.monteCarloP(d, x2.length, y2.length, false, iterations), tol); } + @Test + public void testFillBooleanArrayRandomlyWithFixedNumberTrueValues() { + + final int[][] parameters = {{5, 1}, {5, 2}, {5, 3}, {5, 4}, {8, 1}, {8, 2}, {8, 3}, {8, 4}, {8, 5}, {8, 6}, {8, 7}}; + + final double alpha = 0.001; + final int numIterations = 1000000; + + final RandomGenerator rng = new Well19937c(0); + + for (final int[] parameter : parameters) { + + final int arraySize = parameter[0]; + final int numberOfTrueValues = parameter[1]; + + final boolean[] b = new boolean[arraySize]; + final long[] counts = new long[1 << arraySize]; + + for (int i = 0; i < numIterations; ++i) { + KolmogorovSmirnovTest.fillBooleanArrayRandomlyWithFixedNumberTrueValues(b, numberOfTrueValues, rng); + int x = 0; + for (int j = 0; j < arraySize; ++j) { + x = ((x << 1) | ((b[j])?1:0)); + } + counts[x] += 1; + } + + final int numCombinations = (int) CombinatoricsUtils.binomialCoefficient(arraySize, numberOfTrueValues); + + final long[] observed = new long[numCombinations]; + final double[] expected = new double[numCombinations]; + Arrays.fill(expected, numIterations / (double) numCombinations); + + int observedIdx = 0; + + for (int i = 0; i < (1 << arraySize); ++i) { + if (Integer.bitCount(i) == numberOfTrueValues) { + observed[observedIdx] = counts[i]; + observedIdx += 1; + } + else { + Assert.assertEquals(0, counts[i]); + } + } + + Assert.assertEquals(numCombinations, observedIdx); + Assert.assertFalse(TestUtils.chiSquareTest(expected, observed, alpha)); + } + } + /** * Verifies the inequality exactP(criticalValue, n, m, true) < alpha < exactP(criticalValue, n, * m, false).
