[MATH-1242] fixed shuffle algorithm used by the Monte Carlo KS statistic calculation method, moved shuffle algorithm to static package-private method that is now explicitly tested by a unit test
Project: http://git-wip-us.apache.org/repos/asf/commons-math/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-math/commit/cf4416a8 Tree: http://git-wip-us.apache.org/repos/asf/commons-math/tree/cf4416a8 Diff: http://git-wip-us.apache.org/repos/asf/commons-math/diff/cf4416a8 Branch: refs/heads/master Commit: cf4416a84203ff9d360d4398da3b166e6d0c72b9 Parents: 6913170 Author: Otmar Ertl <[email protected]> Authored: Fri Jul 17 20:46:53 2015 +0200 Committer: Otmar Ertl <[email protected]> Committed: Fri Jul 17 20:46:53 2015 +0200 ---------------------------------------------------------------------- .../stat/inference/KolmogorovSmirnovTest.java | 30 +++++++---- .../inference/KolmogorovSmirnovTestTest.java | 53 ++++++++++++++++++++ 2 files changed, 74 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-math/blob/cf4416a8/src/main/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTest.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTest.java b/src/main/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTest.java index c0e7e51..26020d5 100644 --- a/src/main/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTest.java +++ b/src/main/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTest.java @@ -934,6 +934,26 @@ public class KolmogorovSmirnovTest { } /** + * Fills a boolean array randomly with a fixed number of {@code true} values. + * The method uses a simplified version of the Fisher-Yates shuffle algorithm. + * By processing first the {@code true} values followed by the remaining {@code false} values + * less random numbers need to be generated. The method is optimized for the case + * that the number of {@code true} values is larger than or equal to the number of + * {@code false} values. + * + * @param b boolean array + * @param numberOfTrueValues number of {@code true} values the boolean array should finally have + * @param rng random data generator + */ + static void fillBooleanArrayRandomlyWithFixedNumberTrueValues(final boolean[] b, final int numberOfTrueValues, final RandomGenerator rng) { + Arrays.fill(b, true); + for (int k = numberOfTrueValues; k < b.length; k++) { + final int r = rng.nextInt(k + 1); + b[(b[r]) ? r : k] = false; + } + } + + /** * Uses Monte Carlo simulation to approximate \(P(D_{n,m} > d)\) where \(D_{n,m}\) is the * 2-sample Kolmogorov-Smirnov statistic. See * {@link #kolmogorovSmirnovStatistic(double[], double[])} for the definition of \(D_{n,m}\). @@ -964,15 +984,7 @@ public class KolmogorovSmirnovTest { int tail = 0; final boolean b[] = new boolean[sum]; for (int i = 0; i < iterations; i++) { - // Shuffle n true values and m false values using Fisher-Yates shuffle algorithm. - // By processing first the n true values followed by the m false values the - // shuffle algorithm can be simplified annd requires fewer random numbers. - Arrays.fill(b, true); - for (int k = nn; k < sum; k++) { - int r = rng.nextInt(k); - b[(b[r]) ? r : k] = false; - } - + fillBooleanArrayRandomlyWithFixedNumberTrueValues(b, nn, rng); int rankN = b[0] ? 1 : 0; int rankM = b[0] ? 0 : 1; boolean previous = b[0]; http://git-wip-us.apache.org/repos/asf/commons-math/blob/cf4416a8/src/test/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTestTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTestTest.java b/src/test/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTestTest.java index 3735721..e5c03fe 100644 --- a/src/test/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTestTest.java +++ b/src/test/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTestTest.java @@ -19,10 +19,13 @@ package org.apache.commons.math4.stat.inference; import java.util.Arrays; +import org.apache.commons.math4.TestUtils; import org.apache.commons.math4.distribution.NormalDistribution; import org.apache.commons.math4.distribution.UniformRealDistribution; +import org.apache.commons.math4.random.RandomGenerator; import org.apache.commons.math4.random.Well19937c; import org.apache.commons.math4.stat.inference.KolmogorovSmirnovTest; +import org.apache.commons.math4.util.CombinatoricsUtils; import org.apache.commons.math4.util.FastMath; import org.junit.Assert; import org.junit.Test; @@ -479,6 +482,56 @@ public class KolmogorovSmirnovTestTest { Assert.assertEquals(0.015151515151515027, test.monteCarloP(d, x2.length, y2.length, false, iterations), tol); } + @Test + public void testFillBooleanArrayRandomlyWithFixedNumberTrueValues() { + + final int[][] parameters = {{5, 1}, {5, 2}, {5, 3}, {5, 4}, {8, 1}, {8, 2}, {8, 3}, {8, 4}, {8, 5}, {8, 6}, {8, 7}}; + + final double alpha = 0.001; + final int numIterations = 1000000; + + final RandomGenerator rng = new Well19937c(0); + + for (final int[] parameter : parameters) { + + final int arraySize = parameter[0]; + final int numberOfTrueValues = parameter[1]; + + final boolean[] b = new boolean[arraySize]; + final long[] counts = new long[1 << arraySize]; + + for (int i = 0; i < numIterations; ++i) { + KolmogorovSmirnovTest.fillBooleanArrayRandomlyWithFixedNumberTrueValues(b, numberOfTrueValues, rng); + int x = 0; + for (int j = 0; j < arraySize; ++j) { + x = ((x << 1) | ((b[j])?1:0)); + } + counts[x] += 1; + } + + final int numCombinations = (int) CombinatoricsUtils.binomialCoefficient(arraySize, numberOfTrueValues); + + final long[] observed = new long[numCombinations]; + final double[] expected = new double[numCombinations]; + Arrays.fill(expected, numIterations / (double) numCombinations); + + int observedIdx = 0; + + for (int i = 0; i < (1 << arraySize); ++i) { + if (Integer.bitCount(i) == numberOfTrueValues) { + observed[observedIdx] = counts[i]; + observedIdx += 1; + } + else { + Assert.assertEquals(0, counts[i]); + } + } + + Assert.assertEquals(numCombinations, observedIdx); + TestUtils.assertChiSquareAccept(expected, observed, alpha); + } + } + /** * Verifies the inequality exactP(criticalValue, n, m, true) < alpha < exactP(criticalValue, n, * m, false).
