http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSampler.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSampler.java b/core/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSampler.java new file mode 100644 index 0000000..6804547 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSampler.java @@ -0,0 +1,503 @@ +/* +Copyright � 1999 CERN - European Organization for Nuclear Research. +Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose +is hereby granted without fee, provided that the above copyright notice appear in all copies and +that both that copyright notice and this permission notice appear in supporting documentation. +CERN makes no representations about the suitability of this software for any purpose. +It is provided "as is" without expressed or implied warranty. +*/ +package org.apache.mahout.math.jet.random.sampling; + +import org.apache.mahout.common.RandomUtils; + +import java.util.Random; + +/** + * Space and time efficiently computes a sorted <i>Simple Random Sample Without Replacement + * (SRSWOR)</i>, that is, a sorted set of <tt>n</tt> random numbers from an interval of <tt>N</tt> numbers; + * Example: Computing <tt>n=3</tt> random numbers from the interval <tt>[1,50]</tt> may yield + * the sorted random set <tt>(7,13,47)</tt>. + * Since we are talking about a set (sampling without replacement), no element will occur more than once. + * Each number from the <tt>N</tt> numbers has the same probability to be included in the <tt>n</tt> chosen numbers. + * + * <p><b>Problem:</b> This class solves problems including the following: <i> + * Suppose we have a file containing 10^12 objects. + * We would like to take a truly random subset of 10^6 objects and do something with it, + * for example, compute the sum over some instance field, or whatever. + * How do we choose the subset? In particular, how do we avoid multiple equal elements? + * How do we do this quick and without consuming excessive memory? + * How do we avoid slowly jumping back and forth within the file? </i> + * + * <p><b>Sorted Simple Random Sample Without Replacement (SRSWOR):</b> + * What are the exact semantics of this class? What is a SRSWOR? In which sense exactly is a returned set "random"? + * It is random in the sense, that each number from the <tt>N</tt> numbers has the + * same probability to be included in the <tt>n</tt> chosen numbers. + * For those who think in implementations rather than abstract interfaces: + * <i>Suppose, we have an empty list. + * We pick a random number between 1 and 10^12 and add it to the list only if it was not + * already picked before, i.e. if it is not already contained in the list. + * We then do the same thing again and again until we have eventually collected 10^6 distinct numbers. + * Now we sort the set ascending and return it.</i> + * <dl> + * <dt>It is exactly in this sense that this class returns "random" sets. + * <b>Note, however, that the implementation of this class uses a technique orders of magnitudes + * better (both in time and space) than the one outlined above.</b></dt></dl> + * + * <p><b>Performance:</b> Space requirements are zero. Running time is <tt>O(n)</tt> on average, + * <tt>O(N)</tt> in the worst case. + * <h2>Performance (200Mhz Pentium Pro, JDK 1.2, NT)</h2> + * <center> + * <table border="1" summary="performance table"> + * <tr> + * <td align="center" width="20%">n</td> + * <td align="center" width="20%">N</td> + * <td align="center" width="20%">Speed [seconds]</td> + * </tr> + * <tr> + * <td align="center" width="20%">10<sup>3</sup></td> + * <td align="center" width="20%">1.2*10<sup>3</sup></td> + * <td align="center" width="20">0.0014</td> + * </tr> + * <tr> + * <td align="center" width="20%">10<sup>3</sup></td> + * <td align="center" width="20%">10<sup>7</sup></td> + * <td align="center" width="20">0.006</td> + * </tr> + * <tr> + * <td align="center" width="20%">10<sup>5</sup></td> + * <td align="center" width="20%">10<sup>7</sup></td> + * <td align="center" width="20">0.7</td> + * </tr> + * <tr> + * <td align="center" width="20%">9.0*10<sup>6</sup></td> + * <td align="center" width="20%">10<sup>7</sup></td> + * <td align="center" width="20">8.5</td> + * </tr> + * <tr> + * <td align="center" width="20%">9.9*10<sup>6</sup></td> + * <td align="center" width="20%">10<sup>7</sup></td> + * <td align="center" width="20">2.0 (samples more than 95%)</td> + * </tr> + * <tr> + * <td align="center" width="20%">10<sup>4</sup></td> + * <td align="center" width="20%">10<sup>12</sup></td> + * <td align="center" width="20">0.07</td> + * </tr> + * <tr> + * <td align="center" width="20%">10<sup>7</sup></td> + * <td align="center" width="20%">10<sup>12</sup></td> + * <td align="center" width="20">60</td> + * </tr> + * </table> + * </center> + * + * <p><b>Scalability:</b> This random sampler is designed to be scalable. In iterator style, + * it is able to compute and deliver sorted random sets stepwise in units called <i>blocks</i>. + * Example: Computing <tt>n=9</tt> random numbers from the interval <tt>[1,50]</tt> in + * 3 blocks may yield the blocks <tt>(7,13,14), (27,37,42), (45,46,49)</tt>. + * (The maximum of a block is guaranteed to be less than the minimum of its successor block. + * Every block is sorted ascending. No element will ever occur twice, both within a block and among blocks.) + * A block can be computed and retrieved with method <tt>nextBlock</tt>. + * Successive calls to method <tt>nextBlock</tt> will deliver as many random numbers as required. + * + * <p>Computing and retrieving samples in blocks is useful if you need very many random + * numbers that cannot be stored in main memory at the same time. + * For example, if you want to compute 10^10 such numbers you can do this by computing + * them in blocks of, say, 500 elements each. + * You then need only space to keep one block of 500 elements (i.e. 4 KB). + * When you are finished processing the first 500 elements you call <tt>nextBlock</tt> to + * fill the next 500 elements into the block, process them, and so on. + * If you have the time and need, by using such blocks you can compute random sets + * up to <tt>n=10^19</tt> random numbers. + * + * <p>If you do not need the block feature, you can also directly call + * the static methods of this class without needing to construct a <tt>RandomSampler</tt> instance first. + * + * <p><b>Random number generation:</b> By default uses <tt>MersenneTwister</tt>, a very + * strong random number generator, much better than <tt>java.util.Random</tt>. + * You can also use other strong random number generators of Paul Houle's RngPack package. + * For example, <tt>Ranecu</tt>, <tt>Ranmar</tt> and <tt>Ranlux</tt> are strong well + * analyzed research grade pseudo-random number generators with known periods. + * + * <p><b>Implementation:</b> after J.S. Vitter, An Efficient Algorithm for Sequential Random Sampling, + * ACM Transactions on Mathematical Software, Vol 13, 1987. + * Paper available <A HREF="http://www.cs.duke.edu/~jsv"> here</A>. + */ +public final class RandomSampler { + + private RandomSampler() { + } + + /** + * Efficiently computes a sorted random set of <tt>count</tt> elements from the interval <tt>[low,low+N-1]</tt>. Since + * we are talking about a random set, no element will occur more than once. + * + * <p>Running time is <tt>O(count)</tt>, on average. Space requirements are zero. + * + * <p>Numbers are filled into the specified array starting at index <tt>fromIndex</tt> to the right. The array is + * returned sorted ascending in the range filled with numbers. + * + * @param n the total number of elements to choose (must be >= 0). + * @param N the interval to choose random numbers from is <tt>[low,low+N-1]</tt>. + * @param count the number of elements to be filled into <tt>values</tt> by this call (must be >= 0 and + * <=<tt>n</tt>). Normally, you will set <tt>count=n</tt>. + * @param low the interval to choose random numbers from is <tt>[low,low+N-1]</tt>. Hint: If + * <tt>low==0</tt>, then draws random numbers from the interval <tt>[0,N-1]</tt>. + * @param values the array into which the random numbers are to be filled; must have a length <tt>>= + * count+fromIndex</tt>. + * @param fromIndex the first index within <tt>values</tt> to be filled with numbers (inclusive). + * @param randomGenerator a random number generator. + */ + private static void rejectMethodD(long n, long N, int count, long low, long[] values, int fromIndex, + Random randomGenerator) { + /* This algorithm is applicable if a large percentage (90%..100%) of N shall be sampled. + In such cases it is more efficient than sampleMethodA() and sampleMethodD(). + The idea is that it is more efficient to express + sample(n,N,count) in terms of reject(N-n,N,count) + and then invert the result. + For example, sampling 99% turns into sampling 1% plus inversion. + + This algorithm is the same as method sampleMethodD(...) with the exception that sampled elements are rejected, + and not sampled elements included in the result set. + */ + n = N - n; // IMPORTANT !!! + + //long threshold; + long chosen = -1 + low; + + //long negalphainv = + // -13; //tuning paramter, determines when to switch from method D to method A. Dependent on programming + // language, platform, etc. + + double nreal = n; + double ninv = 1.0 / nreal; + double Nreal = N; + double Vprime = Math.exp(Math.log(randomGenerator.nextDouble()) * ninv); + long qu1 = -n + 1 + N; + double qu1real = -nreal + 1.0 + Nreal; + //threshold = -negalphainv * n; + + long S; + while (n > 1 && count > 0) { //&& threshold<N) { + double nmin1inv = 1.0 / (-1.0 + nreal); + double negSreal; + while (true) { + double X; + while (true) { // step D2: generate U and X + X = Nreal * (-Vprime + 1.0); + S = (long) X; + if (S < qu1) { + break; + } + Vprime = Math.exp(Math.log(randomGenerator.nextDouble()) * ninv); + } + double U = randomGenerator.nextDouble(); + negSreal = -S; + + //step D3: Accept? + double y1 = Math.exp(Math.log(U * Nreal / qu1real) * nmin1inv); + Vprime = y1 * (-X / Nreal + 1.0) * qu1real / (negSreal + qu1real); + if (Vprime <= 1.0) { + break; + } //break inner loop + + //step D4: Accept? + double top = -1.0 + Nreal; + long limit; + double bottom; + if (n - 1 > S) { + bottom = -nreal + Nreal; + limit = -S + N; + } else { + bottom = -1.0 + negSreal + Nreal; + limit = qu1; + } + double y2 = 1.0; + for (long t = N - 1; t >= limit; t--) { + y2 *= top / bottom; + top--; + bottom--; + } + if (Nreal / (-X + Nreal) >= y1 * Math.exp(Math.log(y2) * nmin1inv)) { + // accept ! + Vprime = Math.exp(Math.log(randomGenerator.nextDouble()) * nmin1inv); + break; //break inner loop + } + Vprime = Math.exp(Math.log(randomGenerator.nextDouble()) * ninv); + } + + //step D5: reject the (S+1)st record ! + int iter = count; //int iter = (int) (Math.min(S,count)); + if (S < iter) { + iter = (int) S; + } + + count -= iter; + while (--iter >= 0) { + values[fromIndex++] = ++chosen; + } + chosen++; + + N -= S + 1; + Nreal = negSreal - 1.0 + Nreal; + n--; + nreal--; + ninv = nmin1inv; + qu1 = -S + qu1; + qu1real = negSreal + qu1real; + //threshold += negalphainv; + } //end while + + + if (count > 0) { //special case n==1 + //reject the (S+1)st record ! + S = (long) (N * Vprime); + + int iter = count; //int iter = (int) (Math.min(S,count)); + if (S < iter) { + iter = (int) S; + } + + count -= iter; + while (--iter >= 0) { + values[fromIndex++] = ++chosen; + } + + chosen++; + + // fill the rest + while (--count >= 0) { + values[fromIndex++] = ++chosen; + } + } + } + + /** + * Efficiently computes a sorted random set of <tt>count</tt> elements from the interval <tt>[low,low+N-1]</tt>. Since + * we are talking about a random set, no element will occur more than once. + * + * <p>Running time is <tt>O(count)</tt>, on average. Space requirements are zero. + * + * <p>Numbers are filled into the specified array starting at index <tt>fromIndex</tt> to the right. The array is + * returned sorted ascending in the range filled with numbers. + * + * <p><b>Random number generation:</b> By default uses <tt>MersenneTwister</tt>, a very strong random number + * generator, much better than <tt>java.util.Random</tt>. You can also use other strong random number generators of + * Paul Houle's RngPack package. For example, <tt>Ranecu</tt>, <tt>Ranmar</tt> and <tt>Ranlux</tt> are strong well + * analyzed research grade pseudo-random number generators with known periods. + * + * @param n the total number of elements to choose (must be <tt>n >= 0</tt> and <tt>n <= N</tt>). + * @param N the interval to choose random numbers from is <tt>[low,low+N-1]</tt>. + * @param count the number of elements to be filled into <tt>values</tt> by this call (must be >= 0 and + * <=<tt>n</tt>). Normally, you will set <tt>count=n</tt>. + * @param low the interval to choose random numbers from is <tt>[low,low+N-1]</tt>. Hint: If + * <tt>low==0</tt>, then draws random numbers from the interval <tt>[0,N-1]</tt>. + * @param values the array into which the random numbers are to be filled; must have a length <tt>>= + * count+fromIndex</tt>. + * @param fromIndex the first index within <tt>values</tt> to be filled with numbers (inclusive). + * @param randomGenerator a random number generator. Set this parameter to <tt>null</tt> to use the default random + * number generator. + */ + public static void sample(long n, long N, int count, long low, long[] values, int fromIndex, + Random randomGenerator) { + if (n <= 0 || count <= 0) { + return; + } + if (count > n) { + throw new IllegalArgumentException("count must not be greater than n"); + } + if (randomGenerator == null) { + randomGenerator = RandomUtils.getRandom(); + } + + if (count == N) { // rare case treated quickly + long val = low; + int limit = fromIndex + count; + for (int i = fromIndex; i < limit; i++) { + values[i] = val++; + } + return; + } + + if (n < N * 0.95) { // || Math.min(count,N-n)>maxTmpMemoryAllowed) { + sampleMethodD(n, N, count, low, values, fromIndex, randomGenerator); + } else { // More than 95% of all numbers shall be sampled. + rejectMethodD(n, N, count, low, values, fromIndex, randomGenerator); + } + + + } + + /** + * Computes a sorted random set of <tt>count</tt> elements from the interval <tt>[low,low+N-1]</tt>. Since we are + * talking about a random set, no element will occur more than once. + * + * <p>Running time is <tt>O(N)</tt>, on average. Space requirements are zero. + * + * <p>Numbers are filled into the specified array starting at index <tt>fromIndex</tt> to the right. The array is + * returned sorted ascending in the range filled with numbers. + * + * @param n the total number of elements to choose (must be >= 0). + * @param N the interval to choose random numbers from is <tt>[low,low+N-1]</tt>. + * @param count the number of elements to be filled into <tt>values</tt> by this call (must be >= 0 and + * <=<tt>n</tt>). Normally, you will set <tt>count=n</tt>. + * @param low the interval to choose random numbers from is <tt>[low,low+N-1]</tt>. Hint: If + * <tt>low==0</tt>, then draws random numbers from the interval <tt>[0,N-1]</tt>. + * @param values the array into which the random numbers are to be filled; must have a length <tt>>= + * count+fromIndex</tt>. + * @param fromIndex the first index within <tt>values</tt> to be filled with numbers (inclusive). + * @param randomGenerator a random number generator. + */ + private static void sampleMethodA(long n, long N, int count, long low, long[] values, int fromIndex, + Random randomGenerator) { + long chosen = -1 + low; + + double top = N - n; + double Nreal = N; + long S; + while (n >= 2 && count > 0) { + double V = randomGenerator.nextDouble(); + S = 0; + double quot = top / Nreal; + while (quot > V) { + S++; + top--; + Nreal--; + quot *= top / Nreal; + } + chosen += S + 1; + values[fromIndex++] = chosen; + count--; + Nreal--; + n--; + } + + if (count > 0) { + // special case n==1 + S = (long) (Math.round(Nreal) * randomGenerator.nextDouble()); + chosen += S + 1; + values[fromIndex] = chosen; + } + } + + /** + * Efficiently computes a sorted random set of <tt>count</tt> elements from the interval <tt>[low,low+N-1]</tt>. Since + * we are talking about a random set, no element will occur more than once. + * + * <p>Running time is <tt>O(count)</tt>, on average. Space requirements are zero. + * + * <p>Numbers are filled into the specified array starting at index <tt>fromIndex</tt> to the right. The array is + * returned sorted ascending in the range filled with numbers. + * + * @param n the total number of elements to choose (must be >= 0). + * @param N the interval to choose random numbers from is <tt>[low,low+N-1]</tt>. + * @param count the number of elements to be filled into <tt>values</tt> by this call (must be >= 0 and + * <=<tt>n</tt>). Normally, you will set <tt>count=n</tt>. + * @param low the interval to choose random numbers from is <tt>[low,low+N-1]</tt>. Hint: If + * <tt>low==0</tt>, then draws random numbers from the interval <tt>[0,N-1]</tt>. + * @param values the array into which the random numbers are to be filled; must have a length <tt>>= + * count+fromIndex</tt>. + * @param fromIndex the first index within <tt>values</tt> to be filled with numbers (inclusive). + * @param randomGenerator a random number generator. + */ + private static void sampleMethodD(long n, long N, int count, long low, long[] values, int fromIndex, + Random randomGenerator) { + long chosen = -1 + low; + + double nreal = n; + double ninv = 1.0 / nreal; + double Nreal = N; + double vprime = Math.exp(Math.log(randomGenerator.nextDouble()) * ninv); + long qu1 = -n + 1 + N; + double qu1real = -nreal + 1.0 + Nreal; + long negalphainv = -13; + //tuning paramter, determines when to switch from method D to method A. Dependent on programming + // language, platform, etc. + long threshold = -negalphainv * n; + + long S; + while (n > 1 && count > 0 && threshold < N) { + double nmin1inv = 1.0 / (-1.0 + nreal); + double negSreal; + while (true) { + double X; + while (true) { // step D2: generate U and X + X = Nreal * (-vprime + 1.0); + S = (long) X; + if (S < qu1) { + break; + } + vprime = Math.exp(Math.log(randomGenerator.nextDouble()) * ninv); + } + double U = randomGenerator.nextDouble(); + negSreal = -S; + + //step D3: Accept? + double y1 = Math.exp(Math.log(U * Nreal / qu1real) * nmin1inv); + vprime = y1 * (-X / Nreal + 1.0) * qu1real / (negSreal + qu1real); + if (vprime <= 1.0) { + break; + } //break inner loop + + //step D4: Accept? + double top = -1.0 + Nreal; + long limit; + double bottom; + if (n - 1 > S) { + bottom = -nreal + Nreal; + limit = -S + N; + } else { + bottom = -1.0 + negSreal + Nreal; + limit = qu1; + } + double y2 = 1.0; + for (long t = N - 1; t >= limit; t--) { + y2 *= top / bottom; + top--; + bottom--; + } + if (Nreal / (-X + Nreal) >= y1 * Math.exp(Math.log(y2) * nmin1inv)) { + // accept ! + vprime = Math.exp(Math.log(randomGenerator.nextDouble()) * nmin1inv); + break; //break inner loop + } + vprime = Math.exp(Math.log(randomGenerator.nextDouble()) * ninv); + } + + //step D5: select the (S+1)st record ! + chosen += S + 1; + values[fromIndex++] = chosen; + /* + // invert + for (int iter=0; iter<S && count > 0; iter++) { + values[fromIndex++] = ++chosen; + count--; + } + chosen++; + */ + count--; + + N -= S + 1; + Nreal = negSreal - 1.0 + Nreal; + n--; + nreal--; + ninv = nmin1inv; + qu1 = -S + qu1; + qu1real = negSreal + qu1real; + threshold += negalphainv; + } //end while + + + if (count > 0) { + if (n > 1) { //faster to use method A to finish the sampling + sampleMethodA(n, N, count, chosen + 1, values, fromIndex, randomGenerator); + } else { + //special case n==1 + S = (long) (N * vprime); + chosen += S + 1; + values[fromIndex++] = chosen; + } + } + } + +}
http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/jet/stat/Gamma.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/jet/stat/Gamma.java b/core/src/main/java/org/apache/mahout/math/jet/stat/Gamma.java new file mode 100644 index 0000000..3ab61a6 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/jet/stat/Gamma.java @@ -0,0 +1,681 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +Copyright 1999 CERN - European Organization for Nuclear Research. +Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose +is hereby granted without fee, provided that the above copyright notice appear in all copies and +that both that copyright notice and this permission notice appear in supporting documentation. +CERN makes no representations about the suitability of this software for any purpose. +It is provided "as is" without expressed or implied warranty. +*/ +package org.apache.mahout.math.jet.stat; + +import org.apache.mahout.math.jet.math.Constants; +import org.apache.mahout.math.jet.math.Polynomial; + +/** Partially deprecated until unit tests are in place. Until this time, this class/interface is unsupported. */ +public final class Gamma { + + private static final double MAXSTIR = 143.01608; + + private Gamma() { + } + + /** + * Returns the beta function of the arguments. + * <pre> + * - - + * | (a) | (b) + * beta( a, b ) = -----------. + * - + * | (a+b) + * </pre> + * @param alpha + * @param beta + * @return The beta function for given values of alpha and beta. + */ + public static double beta(double alpha, double beta) { + double y; + if (alpha < 40 && beta < 40) { + y = gamma(alpha + beta); + if (y == 0.0) { + return 1.0; + } + + if (alpha > beta) { + y = gamma(alpha) / y; + y *= gamma(beta); + } else { + y = gamma(beta) / y; + y *= gamma(alpha); + } + } else { + y = Math.exp(logGamma(alpha) + logGamma(beta) - logGamma(alpha + beta)); + } + + return y; + } + + /** Returns the Gamma function of the argument. */ + public static double gamma(double x) { + + double[] pCoefficient = { + 1.60119522476751861407E-4, + 1.19135147006586384913E-3, + 1.04213797561761569935E-2, + 4.76367800457137231464E-2, + 2.07448227648435975150E-1, + 4.94214826801497100753E-1, + 9.99999999999999996796E-1 + }; + double[] qCoefficient = { + -2.31581873324120129819E-5, + 5.39605580493303397842E-4, + -4.45641913851797240494E-3, + 1.18139785222060435552E-2, + 3.58236398605498653373E-2, + -2.34591795718243348568E-1, + 7.14304917030273074085E-2, + 1.00000000000000000320E0 + }; +//double MAXGAM = 171.624376956302725; +//double LOGPI = 1.14472988584940017414; + + double p; + double z; + + double q = Math.abs(x); + + if (q > 33.0) { + if (x < 0.0) { + p = Math.floor(q); + if (p == q) { + throw new ArithmeticException("gamma: overflow"); + } + //int i = (int) p; + z = q - p; + if (z > 0.5) { + p += 1.0; + z = q - p; + } + z = q * Math.sin(Math.PI * z); + if (z == 0.0) { + throw new ArithmeticException("gamma: overflow"); + } + z = Math.abs(z); + z = Math.PI / (z * stirlingFormula(q)); + + return -z; + } else { + return stirlingFormula(x); + } + } + + z = 1.0; + while (x >= 3.0) { + x -= 1.0; + z *= x; + } + + while (x < 0.0) { + if (x == 0.0) { + throw new ArithmeticException("gamma: singular"); + } + if (x > -1.0e-9) { + return z / ((1.0 + 0.5772156649015329 * x) * x); + } + z /= x; + x += 1.0; + } + + while (x < 2.0) { + if (x == 0.0) { + throw new ArithmeticException("gamma: singular"); + } + if (x < 1.0e-9) { + return z / ((1.0 + 0.5772156649015329 * x) * x); + } + z /= x; + x += 1.0; + } + + if ((x == 2.0) || (x == 3.0)) { + return z; + } + + x -= 2.0; + p = Polynomial.polevl(x, pCoefficient, 6); + q = Polynomial.polevl(x, qCoefficient, 7); + return z * p / q; + + } + + /** + * Returns the regularized Incomplete Beta Function evaluated from zero to <tt>xx</tt>; formerly named <tt>ibeta</tt>. + * + * See http://en.wikipedia.org/wiki/Incomplete_beta_function#Incomplete_beta_function + * + * @param alpha the alpha parameter of the beta distribution. + * @param beta the beta parameter of the beta distribution. + * @param xx the integration end point. + */ + public static double incompleteBeta(double alpha, double beta, double xx) { + + if (alpha <= 0.0) { + throw new ArithmeticException("incompleteBeta: Domain error! alpha must be > 0, but was " + alpha); + } + + if (beta <= 0.0) { + throw new ArithmeticException("incompleteBeta: Domain error! beta must be > 0, but was " + beta); + } + + if (xx <= 0.0) { + return 0.0; + } + + if (xx >= 1.0) { + return 1.0; + } + + double t; + if ((beta * xx) <= 1.0 && xx <= 0.95) { + t = powerSeries(alpha, beta, xx); + return t; + } + + double w = 1.0 - xx; + + /* Reverse a and b if x is greater than the mean. */ + double xc; + double x; + double b; + double a; + boolean flag = false; + if (xx > (alpha / (alpha + beta))) { + flag = true; + a = beta; + b = alpha; + xc = xx; + x = w; + } else { + a = alpha; + b = beta; + xc = w; + x = xx; + } + + if (flag && (b * x) <= 1.0 && x <= 0.95) { + t = powerSeries(a, b, x); + t = t <= Constants.MACHEP ? 1.0 - Constants.MACHEP : 1.0 - t; + return t; + } + + /* Choose expansion for better convergence. */ + double y = x * (a + b - 2.0) - (a - 1.0); + w = y < 0.0 ? incompleteBetaFraction1(a, b, x) : incompleteBetaFraction2(a, b, x) / xc; + + /* Multiply w by the factor + a b _ _ _ + x (1-x) | (a+b) / ( a | (a) | (b) ) . */ + + y = a * Math.log(x); + t = b * Math.log(xc); + if ((a + b) < Constants.MAXGAM && Math.abs(y) < Constants.MAXLOG && Math.abs(t) < Constants.MAXLOG) { + t = Math.pow(xc, b); + t *= Math.pow(x, a); + t /= a; + t *= w; + t *= gamma(a + b) / (gamma(a) * gamma(b)); + if (flag) { + t = t <= Constants.MACHEP ? 1.0 - Constants.MACHEP : 1.0 - t; + } + return t; + } + /* Resort to logarithms. */ + y += t + logGamma(a + b) - logGamma(a) - logGamma(b); + y += Math.log(w / a); + t = y < Constants.MINLOG ? 0.0 : Math.exp(y); + + if (flag) { + t = t <= Constants.MACHEP ? 1.0 - Constants.MACHEP : 1.0 - t; + } + return t; + } + + /** Continued fraction expansion #1 for incomplete beta integral; formerly named <tt>incbcf</tt>. */ + static double incompleteBetaFraction1(double a, double b, double x) { + + double k1 = a; + double k2 = a + b; + double k3 = a; + double k4 = a + 1.0; + double k5 = 1.0; + double k6 = b - 1.0; + double k7 = k4; + double k8 = a + 2.0; + + double pkm2 = 0.0; + double qkm2 = 1.0; + double pkm1 = 1.0; + double qkm1 = 1.0; + double ans = 1.0; + double r = 1.0; + int n = 0; + double thresh = 3.0 * Constants.MACHEP; + do { + double xk = -(x * k1 * k2) / (k3 * k4); + double pk = pkm1 + pkm2 * xk; + double qk = qkm1 + qkm2 * xk; + pkm2 = pkm1; + pkm1 = pk; + qkm2 = qkm1; + qkm1 = qk; + + xk = (x * k5 * k6) / (k7 * k8); + pk = pkm1 + pkm2 * xk; + qk = qkm1 + qkm2 * xk; + pkm2 = pkm1; + pkm1 = pk; + qkm2 = qkm1; + qkm1 = qk; + + if (qk != 0) { + r = pk / qk; + } + double t; + if (r != 0) { + t = Math.abs((ans - r) / r); + ans = r; + } else { + t = 1.0; + } + + if (t < thresh) { + return ans; + } + + k1 += 1.0; + k2 += 1.0; + k3 += 2.0; + k4 += 2.0; + k5 += 1.0; + k6 -= 1.0; + k7 += 2.0; + k8 += 2.0; + + if ((Math.abs(qk) + Math.abs(pk)) > Constants.BIG) { + pkm2 *= Constants.BIG_INVERSE; + pkm1 *= Constants.BIG_INVERSE; + qkm2 *= Constants.BIG_INVERSE; + qkm1 *= Constants.BIG_INVERSE; + } + if ((Math.abs(qk) < Constants.BIG_INVERSE) || (Math.abs(pk) < Constants.BIG_INVERSE)) { + pkm2 *= Constants.BIG; + pkm1 *= Constants.BIG; + qkm2 *= Constants.BIG; + qkm1 *= Constants.BIG; + } + } while (++n < 300); + + return ans; + } + + /** Continued fraction expansion #2 for incomplete beta integral; formerly named <tt>incbd</tt>. */ + static double incompleteBetaFraction2(double a, double b, double x) { + + double k1 = a; + double k2 = b - 1.0; + double k3 = a; + double k4 = a + 1.0; + double k5 = 1.0; + double k6 = a + b; + double k7 = a + 1.0; + double k8 = a + 2.0; + + double pkm2 = 0.0; + double qkm2 = 1.0; + double pkm1 = 1.0; + double qkm1 = 1.0; + double z = x / (1.0 - x); + double ans = 1.0; + double r = 1.0; + int n = 0; + double thresh = 3.0 * Constants.MACHEP; + do { + double xk = -(z * k1 * k2) / (k3 * k4); + double pk = pkm1 + pkm2 * xk; + double qk = qkm1 + qkm2 * xk; + pkm2 = pkm1; + pkm1 = pk; + qkm2 = qkm1; + qkm1 = qk; + + xk = (z * k5 * k6) / (k7 * k8); + pk = pkm1 + pkm2 * xk; + qk = qkm1 + qkm2 * xk; + pkm2 = pkm1; + pkm1 = pk; + qkm2 = qkm1; + qkm1 = qk; + + if (qk != 0) { + r = pk / qk; + } + double t; + if (r != 0) { + t = Math.abs((ans - r) / r); + ans = r; + } else { + t = 1.0; + } + + if (t < thresh) { + return ans; + } + + k1 += 1.0; + k2 -= 1.0; + k3 += 2.0; + k4 += 2.0; + k5 += 1.0; + k6 += 1.0; + k7 += 2.0; + k8 += 2.0; + + if ((Math.abs(qk) + Math.abs(pk)) > Constants.BIG) { + pkm2 *= Constants.BIG_INVERSE; + pkm1 *= Constants.BIG_INVERSE; + qkm2 *= Constants.BIG_INVERSE; + qkm1 *= Constants.BIG_INVERSE; + } + if ((Math.abs(qk) < Constants.BIG_INVERSE) || (Math.abs(pk) < Constants.BIG_INVERSE)) { + pkm2 *= Constants.BIG; + pkm1 *= Constants.BIG; + qkm2 *= Constants.BIG; + qkm1 *= Constants.BIG; + } + } while (++n < 300); + + return ans; + } + + /** + * Returns the Incomplete Gamma function; formerly named <tt>igamma</tt>. + * + * @param alpha the shape parameter of the gamma distribution. + * @param x the integration end point. + * @return The value of the unnormalized incomplete gamma function. + */ + public static double incompleteGamma(double alpha, double x) { + if (x <= 0 || alpha <= 0) { + return 0.0; + } + + if (x > 1.0 && x > alpha) { + return 1.0 - incompleteGammaComplement(alpha, x); + } + + /* Compute x**a * exp(-x) / gamma(a) */ + double ax = alpha * Math.log(x) - x - logGamma(alpha); + if (ax < -Constants.MAXLOG) { + return 0.0; + } + + ax = Math.exp(ax); + + /* power series */ + double r = alpha; + double c = 1.0; + double ans = 1.0; + + do { + r += 1.0; + c *= x / r; + ans += c; + } + while (c / ans > Constants.MACHEP); + + return ans * ax / alpha; + + } + + /** + * Returns the Complemented Incomplete Gamma function; formerly named <tt>igamc</tt>. + * + * @param alpha the shape parameter of the gamma distribution. + * @param x the integration start point. + */ + public static double incompleteGammaComplement(double alpha, double x) { + + if (x <= 0 || alpha <= 0) { + return 1.0; + } + + if (x < 1.0 || x < alpha) { + return 1.0 - incompleteGamma(alpha, x); + } + + double ax = alpha * Math.log(x) - x - logGamma(alpha); + if (ax < -Constants.MAXLOG) { + return 0.0; + } + + ax = Math.exp(ax); + + /* continued fraction */ + double y = 1.0 - alpha; + double z = x + y + 1.0; + double c = 0.0; + double pkm2 = 1.0; + double qkm2 = x; + double pkm1 = x + 1.0; + double qkm1 = z * x; + double ans = pkm1 / qkm1; + + double t; + do { + c += 1.0; + y += 1.0; + z += 2.0; + double yc = y * c; + double pk = pkm1 * z - pkm2 * yc; + double qk = qkm1 * z - qkm2 * yc; + if (qk != 0) { + double r = pk / qk; + t = Math.abs((ans - r) / r); + ans = r; + } else { + t = 1.0; + } + + pkm2 = pkm1; + pkm1 = pk; + qkm2 = qkm1; + qkm1 = qk; + if (Math.abs(pk) > Constants.BIG) { + pkm2 *= Constants.BIG_INVERSE; + pkm1 *= Constants.BIG_INVERSE; + qkm2 *= Constants.BIG_INVERSE; + qkm1 *= Constants.BIG_INVERSE; + } + } while (t > Constants.MACHEP); + + return ans * ax; + } + + /** Returns the natural logarithm of the gamma function; formerly named <tt>lgamma</tt>. */ + public static double logGamma(double x) { + double p; + double q; + double z; + + double[] aCoefficient = { + 8.11614167470508450300E-4, + -5.95061904284301438324E-4, + 7.93650340457716943945E-4, + -2.77777777730099687205E-3, + 8.33333333333331927722E-2 + }; + double[] bCoefficient = { + -1.37825152569120859100E3, + -3.88016315134637840924E4, + -3.31612992738871184744E5, + -1.16237097492762307383E6, + -1.72173700820839662146E6, + -8.53555664245765465627E5 + }; + double[] cCoefficient = { + /* 1.00000000000000000000E0, */ + -3.51815701436523470549E2, + -1.70642106651881159223E4, + -2.20528590553854454839E5, + -1.13933444367982507207E6, + -2.53252307177582951285E6, + -2.01889141433532773231E6 + }; + + if (x < -34.0) { + q = -x; + double w = logGamma(q); + p = Math.floor(q); + if (p == q) { + throw new ArithmeticException("lgam: Overflow"); + } + z = q - p; + if (z > 0.5) { + p += 1.0; + z = p - q; + } + z = q * Math.sin(Math.PI * z); + if (z == 0.0) { + throw new + ArithmeticException("lgamma: Overflow"); + } + z = Constants.LOGPI - Math.log(z) - w; + return z; + } + + if (x < 13.0) { + z = 1.0; + while (x >= 3.0) { + x -= 1.0; + z *= x; + } + while (x < 2.0) { + if (x == 0.0) { + throw new ArithmeticException("lgamma: Overflow"); + } + z /= x; + x += 1.0; + } + if (z < 0.0) { + z = -z; + } + if (x == 2.0) { + return Math.log(z); + } + x -= 2.0; + p = x * Polynomial.polevl(x, bCoefficient, 5) / Polynomial.p1evl(x, cCoefficient, 6); + return Math.log(z) + p; + } + + if (x > 2.556348e305) { + throw new ArithmeticException("lgamma: Overflow"); + } + + q = (x - 0.5) * Math.log(x) - x + 0.91893853320467274178; + //if ( x > 1.0e8 ) return( q ); + if (x > 1.0e8) { + return q; + } + + p = 1.0 / (x * x); + if (x >= 1000.0) { + q += ((7.9365079365079365079365e-4 * p + - 2.7777777777777777777778e-3) * p + + 0.0833333333333333333333) / x; + } else { + q += Polynomial.polevl(p, aCoefficient, 4) / x; + } + return q; + } + + /** + * Power series for incomplete beta integral; formerly named <tt>pseries</tt>. Use when b*x is small and x not too + * close to 1. + */ + private static double powerSeries(double a, double b, double x) { + + double ai = 1.0 / a; + double u = (1.0 - b) * x; + double v = u / (a + 1.0); + double t1 = v; + double t = u; + double n = 2.0; + double s = 0.0; + double z = Constants.MACHEP * ai; + while (Math.abs(v) > z) { + u = (n - b) * x / n; + t *= u; + v = t / (a + n); + s += v; + n += 1.0; + } + s += t1; + s += ai; + + u = a * Math.log(x); + if ((a + b) < Constants.MAXGAM && Math.abs(u) < Constants.MAXLOG) { + t = gamma(a + b) / (gamma(a) * gamma(b)); + s *= t * Math.pow(x, a); + } else { + t = logGamma(a + b) - logGamma(a) - logGamma(b) + u + Math.log(s); + s = t < Constants.MINLOG ? 0.0 : Math.exp(t); + } + return s; + } + + /** + * Returns the Gamma function computed by Stirling's formula; formerly named <tt>stirf</tt>. The polynomial STIR is + * valid for 33 <= x <= 172. + */ + static double stirlingFormula(double x) { + double[] coefficients = { + 7.87311395793093628397E-4, + -2.29549961613378126380E-4, + -2.68132617805781232825E-3, + 3.47222221605458667310E-3, + 8.33333333333482257126E-2, + }; + + double w = 1.0 / x; + double y = Math.exp(x); + + w = 1.0 + w * Polynomial.polevl(w, coefficients, 4); + + if (x > MAXSTIR) { + /* Avoid overflow in Math.pow() */ + double v = Math.pow(x, 0.5 * x - 0.25); + y = v * (v / y); + } else { + y = Math.pow(x, x - 0.5) / y; + } + y = Constants.SQTPI * y * w; + return y; + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/jet/stat/Probability.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/jet/stat/Probability.java b/core/src/main/java/org/apache/mahout/math/jet/stat/Probability.java new file mode 100644 index 0000000..bcd1a86 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/jet/stat/Probability.java @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +Copyright 1999 CERN - European Organization for Nuclear Research. +Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose +is hereby granted without fee, provided that the above copyright notice appear in all copies and +that both that copyright notice and this permission notice appear in supporting documentation. +CERN makes no representations about the suitability of this software for any purpose. +It is provided "as is" without expressed or implied warranty. +*/ +package org.apache.mahout.math.jet.stat; + +import org.apache.mahout.math.jet.random.Normal; + +/** Partially deprecated until unit tests are in place. Until this time, this class/interface is unsupported. */ +public final class Probability { + + private static final Normal UNIT_NORMAL = new Normal(0, 1, null); + + private Probability() { + } + + /** + * Returns the area from zero to <tt>x</tt> under the beta density function. + * <pre> + * x + * - - + * | (a+b) | | a-1 b-1 + * P(x) = ---------- | t (1-t) dt + * - - | | + * | (a) | (b) - + * 0 + * </pre> + * This function is identical to the incomplete beta integral function <tt>Gamma.incompleteBeta(a, b, x)</tt>. + * + * The complemented function is + * + * <tt>1 - P(1-x) = Gamma.incompleteBeta( b, a, x )</tt>; + */ + public static double beta(double a, double b, double x) { + return Gamma.incompleteBeta(a, b, x); + } + + /** + * Returns the integral from zero to <tt>x</tt> of the gamma probability density function. + * <pre> + * + * alpha - x + * beta | alpha-1 -beta t + * y = --------- | t e dt + * - | + * | (alpha) - 0 + * </pre> + * The incomplete gamma integral is used, according to the relation + * + * <tt>y = Gamma.incompleteGamma( alpha, beta*x )</tt>. + * + * See http://en.wikipedia.org/wiki/Gamma_distribution#Probability_density_function + * + * @param alpha the shape parameter of the gamma distribution. + * @param beta the rate parameter of the gamma distribution. + * @param x integration end point. + */ + public static double gamma(double alpha, double beta, double x) { + if (x < 0.0) { + return 0.0; + } + return Gamma.incompleteGamma(alpha, beta * x); + } + + /** + * Returns the sum of the terms <tt>0</tt> through <tt>k</tt> of the Negative Binomial Distribution. + * {@code + * k + * -- ( n+j-1 ) n j + * > ( ) p (1-p) + * -- ( j ) + * j=0 + * } + * In a sequence of Bernoulli trials, this is the probability that <tt>k</tt> or fewer failures precede the + * <tt>n</tt>-th success. <p> The terms are not computed individually; instead the incomplete beta integral is + * employed, according to the formula <p> <tt>y = negativeBinomial( k, n, p ) = Gamma.incompleteBeta( n, k+1, p + * )</tt>. + * + * All arguments must be positive, + * + * @param k end term. + * @param n the number of trials. + * @param p the probability of success (must be in <tt>(0.0,1.0)</tt>). + */ + public static double negativeBinomial(int k, int n, double p) { + if (p < 0.0 || p > 1.0) { + throw new IllegalArgumentException(); + } + if (k < 0) { + return 0.0; + } + + return Gamma.incompleteBeta(n, k + 1, p); + } + + /** + * Returns the area under the Normal (Gaussian) probability density function, integrated from minus infinity to + * <tt>x</tt> (assumes mean is zero, variance is one). + * {@code + * x + * - + * 1 | | 2 + * normal(x) = --------- | exp( - t /2 ) dt + * sqrt(2pi) | | + * - + * -inf. + * + * = ( 1 + erf(z) ) / 2 + * = erfc(z) / 2 + * } + * where <tt>z = x/sqrt(2)</tt>. Computation is via the functions <tt>errorFunction</tt> and + * <tt>errorFunctionComplement</tt>. + * <p> + * Computed using method 26.2.17 from Abramovitz and Stegun (see http://www.math.sfu.ca/~cbm/aands/page_932.htm + * and http://en.wikipedia.org/wiki/Normal_distribution#Numerical_approximations_of_the_normal_cdf + */ + + public static double normal(double a) { + if (a < 0) { + return 1 - normal(-a); + } + double b0 = 0.2316419; + double b1 = 0.319381530; + double b2 = -0.356563782; + double b3 = 1.781477937; + double b4 = -1.821255978; + double b5 = 1.330274429; + double t = 1 / (1 + b0 * a); + return 1 - UNIT_NORMAL.pdf(a) * t * (b1 + t * (b2 + t * (b3 + t * (b4 + t * b5)))); + } + + /** + * Returns the area under the Normal (Gaussian) probability density function, integrated from minus infinity to + * <tt>x</tt>. + * {@code + * x + * - + * 1 | | 2 + * normal(x) = --------- | exp( - (t-mean) / 2v ) dt + * sqrt(2pi*v)| | + * - + * -inf. + * + * } + * where <tt>v = variance</tt>. Computation is via the functions <tt>errorFunction</tt>. + * + * @param mean the mean of the normal distribution. + * @param variance the variance of the normal distribution. + * @param x the integration limit. + */ + public static double normal(double mean, double variance, double x) { + return normal((x - mean) / Math.sqrt(variance)); + } + + /** + * Returns the sum of the first <tt>k</tt> terms of the Poisson distribution. + * <pre> + * k j + * -- -m m + * > e -- + * -- j! + * j=0 + * </pre> + * The terms are not summed directly; instead the incomplete gamma integral is employed, according to the relation <p> + * <tt>y = poisson( k, m ) = Gamma.incompleteGammaComplement( k+1, m )</tt>. + * + * The arguments must both be positive. + * + * @param k number of terms. + * @param mean the mean of the poisson distribution. + */ + public static double poisson(int k, double mean) { + if (mean < 0) { + throw new IllegalArgumentException(); + } + if (k < 0) { + return 0.0; + } + return Gamma.incompleteGammaComplement(k + 1, mean); + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/jet/stat/package-info.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/jet/stat/package-info.java b/core/src/main/java/org/apache/mahout/math/jet/stat/package-info.java new file mode 100644 index 0000000..1d4d7bd --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/jet/stat/package-info.java @@ -0,0 +1,5 @@ +/** + * Tools for basic and advanced statistics: Estimators, Gamma functions, Beta functions, Probabilities, + * Special integrals, etc. + */ +package org.apache.mahout.math.jet.stat; http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/list/AbstractList.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/list/AbstractList.java b/core/src/main/java/org/apache/mahout/math/list/AbstractList.java new file mode 100644 index 0000000..c672f40 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/list/AbstractList.java @@ -0,0 +1,247 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + /* +Copyright � 1999 CERN - European Organization for Nuclear Research. +Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose +is hereby granted without fee, provided that the above copyright notice appear in all copies and +that both that copyright notice and this permission notice appear in supporting documentation. +CERN makes no representations about the suitability of this software for any purpose. +It is provided "as is" without expressed or implied warranty. +*/ +/* +Copyright 1999 CERN - European Organization for Nuclear Research. +Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose +is hereby granted without fee, provided that the above copyright notice appear in all copies and +that both that copyright notice and this permission notice appear in supporting documentation. +CERN makes no representations about the suitability of this software for any purpose. +It is provided "as is" without expressed or implied warranty. +*/ +package org.apache.mahout.math.list; + +import org.apache.mahout.math.PersistentObject; + +/** + * Abstract base class for resizable lists holding objects or primitive data types such as + * {@code int}, {@code float}, etc. + * First see the <a href="package-summary.html">package summary</a> and javadoc + * <a href="package-tree.html">tree view</a> to get the broad picture. + * <p> + * <b>Note that this implementation is not synchronized.</b> + * + * @author [email protected] + * @version 1.0, 09/24/99 + * @see java.util.ArrayList + * @see java.util.Vector + * @see java.util.Arrays + */ +public abstract class AbstractList extends PersistentObject { + + public abstract int size(); + + public boolean isEmpty() { + return size() == 0; + } + + /** + * Inserts <tt>length</tt> dummy elements before the specified position into the receiver. Shifts the element + * currently at that position (if any) and any subsequent elements to the right. <b>This method must set the new size + * to be <tt>size()+length</tt></b>. + * + * @param index index before which to insert dummy elements (must be in [0,size]).. + * @param length number of dummy elements to be inserted. + * @throws IndexOutOfBoundsException if <tt>index < 0 || index > size()</tt>. + */ + protected abstract void beforeInsertDummies(int index, int length); + + /** Checks if the given index is in range. */ + protected static void checkRange(int index, int theSize) { + if (index >= theSize || index < 0) { + throw new IndexOutOfBoundsException("Index: " + index + ", Size: " + theSize); + } + } + + /** + * Checks if the given range is within the contained array's bounds. + * + * @throws IndexOutOfBoundsException if <tt>to!=from-1 || from<0 || from>to || to>=size()</tt>. + */ + protected static void checkRangeFromTo(int from, int to, int theSize) { + if (to == from - 1) { + return; + } + if (from < 0 || from > to || to >= theSize) { + throw new IndexOutOfBoundsException("from: " + from + ", to: " + to + ", size=" + theSize); + } + } + + /** + * Removes all elements from the receiver. The receiver will be empty after this call returns, but keep its current + * capacity. + */ + public void clear() { + removeFromTo(0, size() - 1); + } + + /** + * Sorts the receiver into ascending order. This sort is guaranteed to be <i>stable</i>: equal elements will not be + * reordered as a result of the sort.<p> + * + * The sorting algorithm is a modified mergesort (in which the merge is omitted if the highest element in the low + * sublist is less than the lowest element in the high sublist). This algorithm offers guaranteed n*log(n) + * performance, and can approach linear performance on nearly sorted lists. + * + * <p><b>You should never call this method unless you are sure that this particular sorting algorithm is the right one + * for your data set.</b> It is generally better to call <tt>sort()</tt> or <tt>sortFromTo(...)</tt> instead, because + * those methods automatically choose the best sorting algorithm. + */ + public final void mergeSort() { + mergeSortFromTo(0, size() - 1); + } + + /** + * Sorts the receiver into ascending order. This sort is guaranteed to be <i>stable</i>: equal elements will not be + * reordered as a result of the sort.<p> + * + * The sorting algorithm is a modified mergesort (in which the merge is omitted if the highest element in the low + * sublist is less than the lowest element in the high sublist). This algorithm offers guaranteed n*log(n) + * performance, and can approach linear performance on nearly sorted lists. + * + * <p><b>You should never call this method unless you are sure that this particular sorting algorithm is the right one + * for your data set.</b> It is generally better to call <tt>sort()</tt> or <tt>sortFromTo(...)</tt> instead, because + * those methods automatically choose the best sorting algorithm. + * + * @param from the index of the first element (inclusive) to be sorted. + * @param to the index of the last element (inclusive) to be sorted. + * @throws IndexOutOfBoundsException if <tt>(from<0 || from>to || to>=size()) && to!=from-1</tt>. + */ + public abstract void mergeSortFromTo(int from, int to); + + /** + * Sorts the receiver into ascending order. The sorting algorithm is a tuned quicksort, adapted from Jon L. Bentley + * and M. Douglas McIlroy's "Engineering a Sort Function", Software-Practice and Experience, Vol. 23(11) P. 1249-1265 + * (November 1993). This algorithm offers n*log(n) performance on many data sets that cause other quicksorts to + * degrade to quadratic performance. + * + * <p><b>You should never call this method unless you are sure that this particular sorting algorithm is the right one + * for your data set.</b> It is generally better to call <tt>sort()</tt> or <tt>sortFromTo(...)</tt> instead, because + * those methods automatically choose the best sorting algorithm. + */ + public final void quickSort() { + quickSortFromTo(0, size() - 1); + } + + /** + * Sorts the specified range of the receiver into ascending order. The sorting algorithm is a tuned quicksort, + * adapted from Jon L. Bentley and M. Douglas McIlroy's "Engineering a Sort Function", Software-Practice and + * Experience, Vol. 23(11) P. 1249-1265 (November 1993). This algorithm offers n*log(n) performance on many data sets + * that cause other quicksorts to degrade to quadratic performance. + * + * <p><b>You should never call this method unless you are sure that this particular sorting algorithm is the right one + * for your data set.</b> It is generally better to call <tt>sort()</tt> or <tt>sortFromTo(...)</tt> instead, because + * those methods automatically choose the best sorting algorithm. + * + * @param from the index of the first element (inclusive) to be sorted. + * @param to the index of the last element (inclusive) to be sorted. + * @throws IndexOutOfBoundsException if <tt>(from<0 || from>to || to>=size()) && to!=from-1</tt>. + */ + public abstract void quickSortFromTo(int from, int to); + + /** + * Removes the element at the specified position from the receiver. Shifts any subsequent elements to the left. + * + * @param index the index of the element to removed. + * @throws IndexOutOfBoundsException if <tt>index < 0 || index >= size()</tt>. + */ + public void remove(int index) { + removeFromTo(index, index); + } + + /** + * Removes from the receiver all elements whose index is between <code>from</code>, inclusive and <code>to</code>, + * inclusive. Shifts any succeeding elements to the left (reduces their index). This call shortens the list by + * <tt>(to - from + 1)</tt> elements. + * + * @param fromIndex index of first element to be removed. + * @param toIndex index of last element to be removed. + * @throws IndexOutOfBoundsException if <tt>(from<0 || from>to || to>=size()) && to!=from-1</tt>. + */ + public abstract void removeFromTo(int fromIndex, int toIndex); + + /** Reverses the elements of the receiver. Last becomes first, second last becomes second first, and so on. */ + public abstract void reverse(); + + /** + * Sets the size of the receiver. If the new size is greater than the current size, new null or zero items are added + * to the end of the receiver. If the new size is less than the current size, all components at index newSize and + * greater are discarded. This method does not release any superfluos internal memory. Use method <tt>trimToSize</tt> + * to release superfluos internal memory. + * + * @param newSize the new size of the receiver. + * @throws IndexOutOfBoundsException if <tt>newSize < 0</tt>. + */ + public void setSize(int newSize) { + if (newSize < 0) { + throw new IndexOutOfBoundsException("newSize:" + newSize); + } + + int currentSize = size(); + if (newSize != currentSize) { + if (newSize > currentSize) { + beforeInsertDummies(currentSize, newSize - currentSize); + } else if (newSize < currentSize) { + removeFromTo(newSize, currentSize - 1); + } + } + } + + /** + * Sorts the receiver into ascending order. + * + * The sorting algorithm is dynamically chosen according to the characteristics of the data set. + * + * This implementation simply calls <tt>sortFromTo(...)</tt>. Override <tt>sortFromTo(...)</tt> if you can determine + * which sort is most appropriate for the given data set. + */ + public final void sort() { + sortFromTo(0, size() - 1); + } + + /** + * Sorts the specified range of the receiver into ascending order. + * + * The sorting algorithm is dynamically chosen according to the characteristics of the data set. This default + * implementation simply calls quickSort. Override this method if you can determine which sort is most appropriate for + * the given data set. + * + * @param from the index of the first element (inclusive) to be sorted. + * @param to the index of the last element (inclusive) to be sorted. + * @throws IndexOutOfBoundsException if <tt>(from<0 || from>to || to>=size()) && to!=from-1</tt>. + */ + public void sortFromTo(int from, int to) { + quickSortFromTo(from, to); + } + + /** + * Trims the capacity of the receiver to be the receiver's current size. Releases any superfluos internal memory. An + * application can use this operation to minimize the storage of the receiver. <p> This default implementation does + * nothing. Override this method in space efficient implementations. + */ + public void trimToSize() { + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/list/AbstractObjectList.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/list/AbstractObjectList.java b/core/src/main/java/org/apache/mahout/math/list/AbstractObjectList.java new file mode 100644 index 0000000..a1a5899 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/list/AbstractObjectList.java @@ -0,0 +1,80 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* +Copyright 1999 CERN - European Organization for Nuclear Research. +Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose +is hereby granted without fee, provided that the above copyright notice appear in all copies and +that both that copyright notice and this permission notice appear in supporting documentation. +CERN makes no representations about the suitability of this software for any purpose. +It is provided "as is" without expressed or implied warranty. +*/ +package org.apache.mahout.math.list; + +import java.util.Collection; + +/** + Abstract base class for resizable lists holding objects or primitive data types such as <code>int</code>, + <code>float</code>, etc.First see the <a href="package-summary.html">package summary</a> and + javadoc <a href="package-tree.html">tree view</a> to get the broad picture. + <p> + <b>Note that this implementation is not synchronized.</b> + + @author [email protected] + @version 1.0, 09/24/99 + @see java.util.ArrayList + @see java.util.Vector + @see java.util.Arrays + */ +public abstract class AbstractObjectList<T> extends AbstractList { + + /** + * Appends all of the elements of the specified Collection to the receiver. + * + * @throws ClassCastException if an element in the collection is not of the same parameter type of the receiver. + */ + public void addAllOf(Collection<T> collection) { + this.beforeInsertAllOf(size(), collection); + } + + /** + * Inserts all elements of the specified collection before the specified position into the receiver. Shifts the + * element currently at that position (if any) and any subsequent elements to the right (increases their indices). + * + * @param index index before which to insert first element from the specified collection. + * @param collection the collection to be inserted + * @throws ClassCastException if an element in the collection is not of the same parameter type of the + * receiver. + * @throws IndexOutOfBoundsException if <tt>index < 0 || index > size()</tt>. + */ + public void beforeInsertAllOf(int index, Collection<T> collection) { + this.beforeInsertDummies(index, collection.size()); + this.replaceFromWith(index, collection); + } + + /** + * Replaces the part of the receiver starting at <code>from</code> (inclusive) with all the elements of the specified + * collection. Does not alter the size of the receiver. Replaces exactly <tt>Math.max(0,Math.min(size()-from, + * other.size()))</tt> elements. + * + * @param from the index at which to copy the first element from the specified collection. + * @param other Collection to replace part of the receiver + * @throws IndexOutOfBoundsException if <tt>index < 0 || index >= size()</tt>. + */ + public abstract void replaceFromWith(int from, Collection<T> other); +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/list/ObjectArrayList.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/list/ObjectArrayList.java b/core/src/main/java/org/apache/mahout/math/list/ObjectArrayList.java new file mode 100644 index 0000000..c41141f --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/list/ObjectArrayList.java @@ -0,0 +1,419 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* +Copyright � 1999 CERN - European Organization for Nuclear Research. +Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose +is hereby granted without fee, provided that the above copyright notice appear in all copies and +that both that copyright notice and this permission notice appear in supporting documentation. +CERN makes no representations about the suitability of this software for any purpose. +It is provided "as is" without expressed or implied warranty. +*/ +package org.apache.mahout.math.list; + +import org.apache.mahout.math.function.ObjectProcedure; + +import java.util.Collection; + +/** + Resizable list holding <code>${valueType}</code> elements; implemented with arrays. +*/ + +public class ObjectArrayList<T> extends AbstractObjectList<T> { + + /** + * The array buffer into which the elements of the list are stored. The capacity of the list is the length of this + * array buffer. + */ + private Object[] elements; + private int size; + + /** Constructs an empty list. */ + public ObjectArrayList() { + this(10); + } + + /** + * Constructs a list containing the specified elements. The initial size and capacity of the list is the length of the + * array. + * + * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, <b>the array is not copied</b>. So if + * subsequently you modify the specified array directly via the [] operator, be sure you know what you're doing. + * + * @param elements the array to be backed by the the constructed list + */ + public ObjectArrayList(T[] elements) { + elements(elements); + } + + /** + * Constructs an empty list with the specified initial capacity. + * + * @param initialCapacity the number of elements the receiver can hold without auto-expanding itself by allocating new + * internal memory. + */ + @SuppressWarnings("unchecked") + public ObjectArrayList(int initialCapacity) { + elements = new Object[initialCapacity]; + size = 0; + } + + /** + * Appends the specified element to the end of this list. + * + * @param element element to be appended to this list. + */ + public void add(T element) { + // overridden for performance only. + if (size == elements.length) { + ensureCapacity(size + 1); + } + elements[size++] = element; + } + + /** + * Inserts the specified element before the specified position into the receiver. Shifts the element currently at that + * position (if any) and any subsequent elements to the right. + * + * @param index index before which the specified element is to be inserted (must be in [0,size]). + * @param element element to be inserted. + * @throws IndexOutOfBoundsException index is out of range (<tt>index < 0 || index > size()</tt>). + */ + public void beforeInsert(int index, T element) { + // overridden for performance only. + if (size == index) { + add(element); + return; + } + if (index > size || index < 0) { + throw new IndexOutOfBoundsException("Index: " + index + ", Size: " + size); + } + ensureCapacity(size + 1); + System.arraycopy(elements, index, elements, index + 1, size - index); + elements[index] = element; + size++; + } + + + /** + * Returns a deep copy of the receiver. + * + * @return a deep copy of the receiver. + */ + @SuppressWarnings("unchecked") + @Override + public Object clone() { + // overridden for performance only. + return new ObjectArrayList<>((T[]) elements.clone()); + } + + /** + * Returns a deep copy of the receiver; uses <code>clone()</code> and casts the result. + * + * @return a deep copy of the receiver. + */ + @SuppressWarnings("unchecked") + public ObjectArrayList<T> copy() { + return (ObjectArrayList<T>) clone(); + } + + /** + * Returns the elements currently stored, including invalid elements between size and capacity, if any. + * + * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, <b>the array is not copied</b>. So if + * subsequently you modify the returned array directly via the [] operator, be sure you know what you're doing. + * + * @return the elements currently stored. + */ + @SuppressWarnings("unchecked") + public <Q> Q[] elements() { + return (Q[])elements; + } + + /** + * Sets the receiver's elements to be the specified array (not a copy of it). + * + * The size and capacity of the list is the length of the array. <b>WARNING:</b> For efficiency reasons and to keep + * memory usage low, <b>the array is not copied</b>. So if subsequently you modify the specified array directly via + * the [] operator, be sure you know what you're doing. + * + * @param elements the new elements to be stored. + */ + public void elements(T[] elements) { + this.elements = elements; + this.size = elements.length; + } + + /** + * Ensures that the receiver can hold at least the specified number of elements without needing to allocate new + * internal memory. If necessary, allocates new internal memory and increases the capacity of the receiver. + * + * @param minCapacity the desired minimum capacity. + */ + public void ensureCapacity(int minCapacity) { + elements = org.apache.mahout.math.Arrays.ensureCapacity(elements, minCapacity); + } + + /** + * Compares the specified Object with the receiver. Returns true if and only if the specified Object is also an + * ArrayList of the same type, both Lists have the same size, and all corresponding pairs of elements in the two Lists + * are identical. In other words, two Lists are defined to be equal if they contain the same elements in the same + * order. + * + * @param otherObj the Object to be compared for equality with the receiver. + * @return true if the specified Object is equal to the receiver. + */ + @Override + @SuppressWarnings("unchecked") + public boolean equals(Object otherObj) { //delta + // overridden for performance only. + if (!(otherObj instanceof ObjectArrayList)) { + return super.equals(otherObj); + } + if (this == otherObj) { + return true; + } + if (otherObj == null) { + return false; + } + ObjectArrayList<?> other = (ObjectArrayList<?>) otherObj; + if (size() != other.size()) { + return false; + } + + Object[] theElements = elements(); + Object[] otherElements = other.elements(); + for (int i = size(); --i >= 0;) { + if (theElements[i] != otherElements[i]) { + return false; + } + } + return true; + } + + /** + * Applies a procedure to each element of the receiver, if any. Starts at index 0, moving rightwards. + * + * @param procedure the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise + * continues. + * @return <tt>false</tt> if the procedure stopped before all elements where iterated over, <tt>true</tt> otherwise. + */ + @SuppressWarnings("unchecked") + public boolean forEach(ObjectProcedure<T> procedure) { + T[] theElements = (T[]) elements; + int theSize = size; + + for (int i = 0; i < theSize;) { + if (!procedure.apply(theElements[i++])) { + return false; + } + } + return true; + } + + /** + * Returns the element at the specified position in the receiver. + * + * @param index index of element to return. + * @throws IndexOutOfBoundsException index is out of range (index < 0 || index >= size()). + */ + @SuppressWarnings("unchecked") + public T get(int index) { + // overridden for performance only. + if (index >= size || index < 0) { + throw new IndexOutOfBoundsException("Index: " + index + ", Size: " + size); + } + return (T) elements[index]; + } + + /** + * Returns the element at the specified position in the receiver; <b>WARNING:</b> Does not check preconditions. + * Provided with invalid parameters this method may return invalid elements without throwing any exception! <b>You + * should only use this method when you are absolutely sure that the index is within bounds.</b> Precondition + * (unchecked): <tt>index >= 0 && index < size()</tt>. + * + * @param index index of element to return. + */ + @SuppressWarnings("unchecked") + public T getQuick(int index) { + return (T) elements[index]; + } + + /** + * Returns the index of the first occurrence of the specified element. Returns <code>-1</code> if the receiver does + * not contain this element. Searches between <code>from</code>, inclusive and <code>to</code>, inclusive. Tests for + * identity. + * + * @param element element to search for. + * @param from the leftmost search position, inclusive. + * @param to the rightmost search position, inclusive. + * @return the index of the first occurrence of the element in the receiver; returns <code>-1</code> if the element is + * not found. + * @throws IndexOutOfBoundsException index is out of range (<tt>size()>0 && (from<0 || from>to || + * to>=size())</tt>). + */ + public int indexOfFromTo(T element, int from, int to) { + // overridden for performance only. + if (size == 0) { + return -1; + } + checkRangeFromTo(from, to, size); + + Object[] theElements = elements; + for (int i = from; i <= to; i++) { + if (element == theElements[i]) { + return i; + } //found + } + return -1; //not found + } + + /** + * Returns the index of the last occurrence of the specified element. Returns <code>-1</code> if the receiver does not + * contain this element. Searches beginning at <code>to</code>, inclusive until <code>from</code>, inclusive. Tests + * for identity. + * + * @param element element to search for. + * @param from the leftmost search position, inclusive. + * @param to the rightmost search position, inclusive. + * @return the index of the last occurrence of the element in the receiver; returns <code>-1</code> if the element is + * not found. + * @throws IndexOutOfBoundsException index is out of range (<tt>size()>0 && (from<0 || from>to || + * to>=size())</tt>). + */ + public int lastIndexOfFromTo(T element, int from, int to) { + // overridden for performance only. + if (size == 0) { + return -1; + } + checkRangeFromTo(from, to, size); + + Object[] theElements = elements; + for (int i = to; i >= from; i--) { + if (element == theElements[i]) { + return i; + } //found + } + return -1; //not found + } + + /** + * Returns a new list of the part of the receiver between <code>from</code>, inclusive, and <code>to</code>, + * inclusive. + * + * @param from the index of the first element (inclusive). + * @param to the index of the last element (inclusive). + * @return a new list + * @throws IndexOutOfBoundsException index is out of range (<tt>size()>0 && (from<0 || from>to || + * to>=size())</tt>). + */ + @SuppressWarnings("unchecked") + public AbstractObjectList<T> partFromTo(int from, int to) { + if (size == 0) { + return new ObjectArrayList<>(0); + } + + checkRangeFromTo(from, to, size); + + Object[] part = new Object[to - from + 1]; + System.arraycopy(elements, from, part, 0, to - from + 1); + return new ObjectArrayList<>((T[]) part); + } + + /** Reverses the elements of the receiver. Last becomes first, second last becomes second first, and so on. */ + @Override + public void reverse() { + // overridden for performance only. + int limit = size / 2; + int j = size - 1; + + Object[] theElements = elements; + for (int i = 0; i < limit;) { //swap + Object tmp = theElements[i]; + theElements[i++] = theElements[j]; + theElements[j--] = tmp; + } + } + + /** + * Replaces the element at the specified position in the receiver with the specified element. + * + * @param index index of element to replace. + * @param element element to be stored at the specified position. + * @throws IndexOutOfBoundsException index is out of range (index < 0 || index >= size()). + */ + public void set(int index, T element) { + // overridden for performance only. + if (index >= size || index < 0) { + throw new IndexOutOfBoundsException("Index: " + index + ", Size: " + size); + } + elements[index] = element; + } + + /** + * Replaces the element at the specified position in the receiver with the specified element; <b>WARNING:</b> Does not + * check preconditions. Provided with invalid parameters this method may access invalid indexes without throwing any + * exception! <b>You should only use this method when you are absolutely sure that the index is within bounds.</b> + * Precondition (unchecked): {@code index >= 0 && index < size()}. + * + * @param index index of element to replace. + * @param element element to be stored at the specified position. + */ + public void setQuick(int index, T element) { + elements[index] = element; + } + + /** + * Trims the capacity of the receiver to be the receiver's current size. Releases any superfluous internal memory. An + * application can use this operation to minimize the storage of the receiver. + */ + @Override + public void trimToSize() { + elements = org.apache.mahout.math.Arrays.trimToCapacity(elements, size()); + } + + @Override + public void removeFromTo(int fromIndex, int toIndex) { + throw new UnsupportedOperationException(); + } + + @Override + public void replaceFromWith(int from, Collection<T> other) { + throw new UnsupportedOperationException(); + } + + @Override + protected void beforeInsertDummies(int index, int length) { + throw new UnsupportedOperationException(); + } + + @Override + public void mergeSortFromTo(int from, int to) { + throw new UnsupportedOperationException(); + } + + @Override + public void quickSortFromTo(int from, int to) { + throw new UnsupportedOperationException(); + } + + @Override + public int size() { + return size; + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/list/SimpleLongArrayList.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/list/SimpleLongArrayList.java b/core/src/main/java/org/apache/mahout/math/list/SimpleLongArrayList.java new file mode 100644 index 0000000..1a765eb --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/list/SimpleLongArrayList.java @@ -0,0 +1,102 @@ +/* +Copyright 1999 CERN - European Organization for Nuclear Research. +Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose +is hereby granted without fee, provided that the above copyright notice appear in all copies and +that both that copyright notice and this permission notice appear in supporting documentation. +CERN makes no representations about the suitability of this software for any purpose. +It is provided "as is" without expressed or implied warranty. +*/ +package org.apache.mahout.math.list; + +/** + Resizable list holding <code>long</code> elements; implemented with arrays; not efficient; just to + demonstrate which methods you must override to implement a fully functional list. + */ +public class SimpleLongArrayList extends AbstractLongList { + + /** + * The array buffer into which the elements of the list are stored. The capacity of the list is the length of this + * array buffer. + */ + private long[] elements; + + /** Constructs an empty list. */ + public SimpleLongArrayList() { + this(10); + } + + /** + * Constructs a list containing the specified elements. The initial size and capacity of the list is the length of the + * array. + * + * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, <b>the array is not copied</b>. So if + * subsequently you modify the specified array directly via the [] operator, be sure you know what you're doing. + * + * @param elements the array to be backed by the the constructed list + */ + public SimpleLongArrayList(long[] elements) { + elements(elements); + } + + /** + * Constructs an empty list with the specified initial capacity. + * + * @param initialCapacity the number of elements the receiver can hold without auto-expanding itself by allocating new + * internal memory. + */ + private SimpleLongArrayList(int initialCapacity) { + if (initialCapacity < 0) { + throw new IllegalArgumentException("Illegal Capacity: " + initialCapacity); + } + + this.elements(new long[initialCapacity]); + size = 0; + } + + /** + * Ensures that the receiver can hold at least the specified number of elements without needing to allocate new + * internal memory. If necessary, allocates new internal memory and increases the capacity of the receiver. + * + * @param minCapacity the desired minimum capacity. + */ + @Override + public void ensureCapacity(int minCapacity) { + elements = org.apache.mahout.math.Arrays.ensureCapacity(elements, minCapacity); + } + + /** + * Returns the element at the specified position in the receiver; <b>WARNING:</b> Does not check preconditions. + * Provided with invalid parameters this method may return invalid elements without throwing any exception! <b>You + * should only use this method when you are absolutely sure that the index is within bounds.</b> Precondition + * (unchecked): <tt>index >= 0 && index < size()</tt>. + * + * @param index index of element to return. + */ + @Override + protected long getQuick(int index) { + return elements[index]; + } + + /** + * Replaces the element at the specified position in the receiver with the specified element; <b>WARNING:</b> Does not + * check preconditions. Provided with invalid parameters this method may access invalid indexes without throwing any + * exception! <b>You should only use this method when you are absolutely sure that the index is within bounds.</b> + * Precondition (unchecked): <tt>index >= 0 && index < size()</tt>. + * + * @param index index of element to replace. + * @param element element to be stored at the specified position. + */ + @Override + protected void setQuick(int index, long element) { + elements[index] = element; + } + + /** + * Trims the capacity of the receiver to be the receiver's current size. An application can use this operation to + * minimize the storage of the receiver. + */ + @Override + public void trimToSize() { + elements = org.apache.mahout.math.Arrays.trimToCapacity(elements, size()); + } +}
