[05/15] mahout git commit: NO-JIRA Trevors updates

rawkintrevo Sat, 08 Sep 2018 16:35:15 -0700

http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSampler.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSampler.java
 
b/core/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSampler.java
new file mode 100644
index 0000000..6804547
--- /dev/null
+++ 
b/core/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSampler.java
@@ -0,0 +1,503 @@
+/*
+Copyright ï¿½ 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its 
documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear 
in all copies and 
+that both that copyright notice and this permission notice appear in 
supporting documentation. 
+CERN makes no representations about the suitability of this software for any 
purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.math.jet.random.sampling;
+
+import org.apache.mahout.common.RandomUtils;
+
+import java.util.Random;
+
+/**
+ * Space and time efficiently computes a sorted <i>Simple Random Sample 
Without Replacement
+ * (SRSWOR)</i>, that is, a sorted set of <tt>n</tt> random numbers from an 
interval of <tt>N</tt> numbers;
+ * Example: Computing <tt>n=3</tt> random numbers from the interval 
<tt>[1,50]</tt> may yield
+ * the sorted random set <tt>(7,13,47)</tt>.
+ * Since we are talking about a set (sampling without replacement), no element 
will occur more than once.
+ * Each number from the <tt>N</tt> numbers has the same probability to be 
included in the <tt>n</tt> chosen numbers.
+ *
+ * <p><b>Problem:</b> This class solves problems including the following: <i>
+ * Suppose we have a file containing 10^12 objects.
+ * We would like to take a truly random subset of 10^6 objects and do 
something with it, 
+ * for example, compute the sum over some instance field, or whatever.
+ * How do we choose the subset? In particular, how do we avoid multiple equal 
elements?
+ * How do we do this quick and without consuming excessive memory?
+ * How do we avoid slowly jumping back and forth within the file? </i>
+ *
+ * <p><b>Sorted Simple Random Sample Without Replacement (SRSWOR):</b>
+ * What are the exact semantics of this class? What is a SRSWOR? In which 
sense exactly is a returned set "random"?
+ * It is random in the sense, that each number from the <tt>N</tt> numbers has 
the
+ * same probability to be included in the <tt>n</tt> chosen numbers.
+ * For those who think in implementations rather than abstract interfaces:
+ * <i>Suppose, we have an empty list.
+ * We pick a random number between 1 and 10^12 and add it to the list only if 
it was not
+ * already picked before, i.e. if it is not already contained in the list.
+ * We then do the same thing again and again until we have eventually 
collected 10^6 distinct numbers.
+ * Now we sort the set ascending and return it.</i>
+ * <dl>
+ * <dt>It is exactly in this sense that this class returns "random" sets.
+ * <b>Note, however, that the implementation of this class uses a technique 
orders of magnitudes
+ * better (both in time and space) than the one outlined above.</b></dt></dl>
+ *
+ * <p><b>Performance:</b> Space requirements are zero. Running time is 
<tt>O(n)</tt> on average,
+ * <tt>O(N)</tt> in the worst case.
+ * <h2>Performance (200Mhz Pentium Pro, JDK 1.2, NT)</h2>
+ * <center>
+ *   <table border="1" summary="performance table">
+ *     <tr> 
+ *       <td align="center" width="20%">n</td>
+ *       <td align="center" width="20%">N</td>
+ *       <td align="center" width="20%">Speed [seconds]</td>
+ *     </tr>
+ *     <tr> 
+ *       <td align="center" width="20%">10<sup>3</sup></td>
+ *       <td align="center" width="20%">1.2*10<sup>3</sup></td>
+ *       <td align="center" width="20">0.0014</td>
+ *     </tr>
+ *     <tr> 
+ *       <td align="center" width="20%">10<sup>3</sup></td>
+ *       <td align="center" width="20%">10<sup>7</sup></td>
+ *       <td align="center" width="20">0.006</td>
+ *     </tr>
+ *     <tr> 
+ *       <td align="center" width="20%">10<sup>5</sup></td>
+ *       <td align="center" width="20%">10<sup>7</sup></td>
+ *       <td align="center" width="20">0.7</td>
+ *     </tr>
+ *     <tr> 
+ *       <td align="center" width="20%">9.0*10<sup>6</sup></td>
+ *       <td align="center" width="20%">10<sup>7</sup></td>
+ *       <td align="center" width="20">8.5</td>
+ *     </tr>
+ *     <tr> 
+ *       <td align="center" width="20%">9.9*10<sup>6</sup></td>
+ *       <td align="center" width="20%">10<sup>7</sup></td>
+ *       <td align="center" width="20">2.0 (samples more than 95%)</td>
+ *     </tr>
+ *     <tr> 
+ *       <td align="center" width="20%">10<sup>4</sup></td>
+ *       <td align="center" width="20%">10<sup>12</sup></td>
+ *       <td align="center" width="20">0.07</td>
+ *     </tr>
+ *     <tr> 
+ *       <td align="center" width="20%">10<sup>7</sup></td>
+ *       <td align="center" width="20%">10<sup>12</sup></td>
+ *       <td align="center" width="20">60</td>
+ *     </tr>
+ *   </table>
+ * </center>
+ *
+ * <p><b>Scalability:</b> This random sampler is designed to be scalable. In 
iterator style,
+ * it is able to compute and deliver sorted random sets stepwise in units 
called <i>blocks</i>.
+ * Example: Computing <tt>n=9</tt> random numbers from the interval 
<tt>[1,50]</tt> in
+ * 3 blocks may yield the blocks <tt>(7,13,14), (27,37,42), (45,46,49)</tt>.
+ * (The maximum of a block is guaranteed to be less than the minimum of its 
successor block.
+ * Every block is sorted ascending. No element will ever occur twice, both 
within a block and among blocks.)
+ * A block can be computed and retrieved with method <tt>nextBlock</tt>.
+ * Successive calls to method <tt>nextBlock</tt> will deliver as many random 
numbers as required.
+ *
+ * <p>Computing and retrieving samples in blocks is useful if you need very 
many random
+ * numbers that cannot be stored in main memory at the same time.
+ * For example, if you want to compute 10^10 such numbers you can do this by 
computing
+ * them in blocks of, say, 500 elements each.
+ * You then need only space to keep one block of 500 elements (i.e. 4 KB).
+ * When you are finished processing the first 500 elements you call 
<tt>nextBlock</tt> to
+ * fill the next 500 elements into the block, process them, and so on.
+ * If you have the time and need, by using such blocks you can compute random 
sets
+ * up to <tt>n=10^19</tt> random numbers.
+ *
+ * <p>If you do not need the block feature, you can also directly call 
+ * the static methods of this class without needing to construct a 
<tt>RandomSampler</tt> instance first.
+ *
+ * <p><b>Random number generation:</b> By default uses 
<tt>MersenneTwister</tt>, a very
+ * strong random number generator, much better than <tt>java.util.Random</tt>.
+ * You can also use other strong random number generators of Paul Houle's 
RngPack package.
+ * For example, <tt>Ranecu</tt>, <tt>Ranmar</tt> and <tt>Ranlux</tt> are 
strong well
+ * analyzed research grade pseudo-random number generators with known periods.
+ *
+ * <p><b>Implementation:</b> after J.S. Vitter, An Efficient Algorithm for 
Sequential Random Sampling,
+ * ACM Transactions on Mathematical Software, Vol 13, 1987.
+ * Paper available <A HREF="http://www.cs.duke.edu/~jsv";> here</A>.
+ */
+public final class RandomSampler {
+
+  private RandomSampler() {
+  }
+
+  /**
+   * Efficiently computes a sorted random set of <tt>count</tt> elements from 
the interval <tt>[low,low+N-1]</tt>. Since
+   * we are talking about a random set, no element will occur more than once.
+   *
+   * <p>Running time is <tt>O(count)</tt>, on average. Space requirements are 
zero.
+   *
+   * <p>Numbers are filled into the specified array starting at index 
<tt>fromIndex</tt> to the right. The array is
+   * returned sorted ascending in the range filled with numbers.
+   *
+   * @param n               the total number of elements to choose (must be 
&gt;= 0).
+   * @param N               the interval to choose random numbers from is 
<tt>[low,low+N-1]</tt>.
+   * @param count           the number of elements to be filled into 
<tt>values</tt> by this call (must be &gt;= 0 and
+   *                        &lt;=<tt>n</tt>). Normally, you will set 
<tt>count=n</tt>.
+   * @param low             the interval to choose random numbers from is 
<tt>[low,low+N-1]</tt>. Hint: If
+   *                        <tt>low==0</tt>, then draws random numbers from 
the interval <tt>[0,N-1]</tt>.
+   * @param values          the array into which the random numbers are to be 
filled; must have a length <tt>&gt;=
+   *                        count+fromIndex</tt>.
+   * @param fromIndex       the first index within <tt>values</tt> to be 
filled with numbers (inclusive).
+   * @param randomGenerator a random number generator.
+   */
+  private static void rejectMethodD(long n, long N, int count, long low, 
long[] values, int fromIndex,
+                                    Random randomGenerator) {
+    /*  This algorithm is applicable if a large percentage (90%..100%) of N 
shall be sampled.
+      In such cases it is more efficient than sampleMethodA() and 
sampleMethodD().
+        The idea is that it is more efficient to express
+      sample(n,N,count) in terms of reject(N-n,N,count)
+       and then invert the result.
+      For example, sampling 99% turns into sampling 1% plus inversion.
+
+      This algorithm is the same as method sampleMethodD(...) with the 
exception that sampled elements are rejected,
+      and not sampled elements included in the result set.
+    */
+    n = N - n; // IMPORTANT !!!
+
+    //long threshold;
+    long chosen = -1 + low;
+
+    //long negalphainv =
+    //    -13;  //tuning paramter, determines when to switch from method D to 
method A. Dependent on programming
+    // language, platform, etc.
+
+    double nreal = n;
+    double ninv = 1.0 / nreal;
+    double Nreal = N;
+    double Vprime = Math.exp(Math.log(randomGenerator.nextDouble()) * ninv);
+    long qu1 = -n + 1 + N;
+    double qu1real = -nreal + 1.0 + Nreal;
+    //threshold = -negalphainv * n;
+
+    long S;
+    while (n > 1 && count > 0) { //&& threshold<N) {
+      double nmin1inv = 1.0 / (-1.0 + nreal);
+      double negSreal;
+      while (true) {
+        double X;
+        while (true) { // step D2: generate U and X
+          X = Nreal * (-Vprime + 1.0);
+          S = (long) X;
+          if (S < qu1) {
+            break;
+          }
+          Vprime = Math.exp(Math.log(randomGenerator.nextDouble()) * ninv);
+        }
+        double U = randomGenerator.nextDouble();
+        negSreal = -S;
+
+        //step D3: Accept?
+        double y1 = Math.exp(Math.log(U * Nreal / qu1real) * nmin1inv);
+        Vprime = y1 * (-X / Nreal + 1.0) * qu1real / (negSreal + qu1real);
+        if (Vprime <= 1.0) {
+          break;
+        } //break inner loop
+
+        //step D4: Accept?
+        double top = -1.0 + Nreal;
+        long limit;
+        double bottom;
+        if (n - 1 > S) {
+          bottom = -nreal + Nreal;
+          limit = -S + N;
+        } else {
+          bottom = -1.0 + negSreal + Nreal;
+          limit = qu1;
+        }
+        double y2 = 1.0;
+        for (long t = N - 1; t >= limit; t--) {
+          y2 *= top / bottom;
+          top--;
+          bottom--;
+        }
+        if (Nreal / (-X + Nreal) >= y1 * Math.exp(Math.log(y2) * nmin1inv)) {
+          // accept !
+          Vprime = Math.exp(Math.log(randomGenerator.nextDouble()) * nmin1inv);
+          break; //break inner loop
+        }
+        Vprime = Math.exp(Math.log(randomGenerator.nextDouble()) * ninv);
+      }
+
+      //step D5: reject the (S+1)st record !
+      int iter = count; //int iter = (int) (Math.min(S,count));
+      if (S < iter) {
+        iter = (int) S;
+      }
+
+      count -= iter;
+      while (--iter >= 0) {
+        values[fromIndex++] = ++chosen;
+      }
+      chosen++;
+
+      N -= S + 1;
+      Nreal = negSreal - 1.0 + Nreal;
+      n--;
+      nreal--;
+      ninv = nmin1inv;
+      qu1 = -S + qu1;
+      qu1real = negSreal + qu1real;
+      //threshold += negalphainv;
+    } //end while
+
+
+    if (count > 0) { //special case n==1
+      //reject the (S+1)st record !
+      S = (long) (N * Vprime);
+
+      int iter = count; //int iter = (int) (Math.min(S,count));
+      if (S < iter) {
+        iter = (int) S;
+      }
+
+      count -= iter;
+      while (--iter >= 0) {
+        values[fromIndex++] = ++chosen;
+      }
+
+      chosen++;
+
+      // fill the rest
+      while (--count >= 0) {
+        values[fromIndex++] = ++chosen;
+      }
+    }
+  }
+
+  /**
+   * Efficiently computes a sorted random set of <tt>count</tt> elements from 
the interval <tt>[low,low+N-1]</tt>. Since
+   * we are talking about a random set, no element will occur more than once.
+   *
+   * <p>Running time is <tt>O(count)</tt>, on average. Space requirements are 
zero.
+   *
+   * <p>Numbers are filled into the specified array starting at index 
<tt>fromIndex</tt> to the right. The array is
+   * returned sorted ascending in the range filled with numbers.
+   *
+   * <p><b>Random number generation:</b> By default uses 
<tt>MersenneTwister</tt>, a very strong random number
+   * generator, much better than <tt>java.util.Random</tt>. You can also use 
other strong random number generators of
+   * Paul Houle's RngPack package. For example, <tt>Ranecu</tt>, 
<tt>Ranmar</tt> and <tt>Ranlux</tt> are strong well
+   * analyzed research grade pseudo-random number generators with known 
periods.
+   *
+   * @param n               the total number of elements to choose (must be 
<tt>n &gt;= 0</tt> and <tt>n &lt;= N</tt>).
+   * @param N               the interval to choose random numbers from is 
<tt>[low,low+N-1]</tt>.
+   * @param count           the number of elements to be filled into 
<tt>values</tt> by this call (must be &gt;= 0 and
+   *                        &lt;=<tt>n</tt>). Normally, you will set 
<tt>count=n</tt>.
+   * @param low             the interval to choose random numbers from is 
<tt>[low,low+N-1]</tt>. Hint: If
+   *                        <tt>low==0</tt>, then draws random numbers from 
the interval <tt>[0,N-1]</tt>.
+   * @param values          the array into which the random numbers are to be 
filled; must have a length <tt>&gt;=
+   *                        count+fromIndex</tt>.
+   * @param fromIndex       the first index within <tt>values</tt> to be 
filled with numbers (inclusive).
+   * @param randomGenerator a random number generator. Set this parameter to 
<tt>null</tt> to use the default random
+   *                        number generator.
+   */
+  public static void sample(long n, long N, int count, long low, long[] 
values, int fromIndex,
+                            Random randomGenerator) {
+    if (n <= 0 || count <= 0) {
+      return;
+    }
+    if (count > n) {
+      throw new IllegalArgumentException("count must not be greater than n");
+    }
+    if (randomGenerator == null) {
+      randomGenerator = RandomUtils.getRandom();
+    }
+
+    if (count == N) { // rare case treated quickly
+      long val = low;
+      int limit = fromIndex + count;
+      for (int i = fromIndex; i < limit; i++) {
+        values[i] = val++;
+      }
+      return;
+    }
+
+    if (n < N * 0.95) { // || Math.min(count,N-n)>maxTmpMemoryAllowed) {
+      sampleMethodD(n, N, count, low, values, fromIndex, randomGenerator);
+    } else { // More than 95% of all numbers shall be sampled.
+      rejectMethodD(n, N, count, low, values, fromIndex, randomGenerator);
+    }
+
+
+  }
+
+  /**
+   * Computes a sorted random set of <tt>count</tt> elements from the interval 
<tt>[low,low+N-1]</tt>. Since we are
+   * talking about a random set, no element will occur more than once.
+   *
+   * <p>Running time is <tt>O(N)</tt>, on average. Space requirements are zero.
+   *
+   * <p>Numbers are filled into the specified array starting at index 
<tt>fromIndex</tt> to the right. The array is
+   * returned sorted ascending in the range filled with numbers.
+   *
+   * @param n               the total number of elements to choose (must be 
&gt;= 0).
+   * @param N               the interval to choose random numbers from is 
<tt>[low,low+N-1]</tt>.
+   * @param count           the number of elements to be filled into 
<tt>values</tt> by this call (must be &gt;= 0 and
+   *                        &lt;=<tt>n</tt>). Normally, you will set 
<tt>count=n</tt>.
+   * @param low             the interval to choose random numbers from is 
<tt>[low,low+N-1]</tt>. Hint: If
+   *                        <tt>low==0</tt>, then draws random numbers from 
the interval <tt>[0,N-1]</tt>.
+   * @param values          the array into which the random numbers are to be 
filled; must have a length <tt>&gt;=
+   *                        count+fromIndex</tt>.
+   * @param fromIndex       the first index within <tt>values</tt> to be 
filled with numbers (inclusive).
+   * @param randomGenerator a random number generator.
+   */
+  private static void sampleMethodA(long n, long N, int count, long low, 
long[] values, int fromIndex,
+                                    Random randomGenerator) {
+    long chosen = -1 + low;
+
+    double top = N - n;
+    double Nreal = N;
+    long S;
+    while (n >= 2 && count > 0) {
+      double V = randomGenerator.nextDouble();
+      S = 0;
+      double quot = top / Nreal;
+      while (quot > V) {
+        S++;
+        top--;
+        Nreal--;
+        quot *= top / Nreal;
+      }
+      chosen += S + 1;
+      values[fromIndex++] = chosen;
+      count--;
+      Nreal--;
+      n--;
+    }
+
+    if (count > 0) {
+      // special case n==1
+      S = (long) (Math.round(Nreal) * randomGenerator.nextDouble());
+      chosen += S + 1;
+      values[fromIndex] = chosen;
+    }
+  }
+
+  /**
+   * Efficiently computes a sorted random set of <tt>count</tt> elements from 
the interval <tt>[low,low+N-1]</tt>. Since
+   * we are talking about a random set, no element will occur more than once.
+   *
+   * <p>Running time is <tt>O(count)</tt>, on average. Space requirements are 
zero.
+   *
+   * <p>Numbers are filled into the specified array starting at index 
<tt>fromIndex</tt> to the right. The array is
+   * returned sorted ascending in the range filled with numbers.
+   *
+   * @param n               the total number of elements to choose (must be 
&gt;= 0).
+   * @param N               the interval to choose random numbers from is 
<tt>[low,low+N-1]</tt>.
+   * @param count           the number of elements to be filled into 
<tt>values</tt> by this call (must be &gt;= 0 and
+   *                        &lt;=<tt>n</tt>). Normally, you will set 
<tt>count=n</tt>.
+   * @param low             the interval to choose random numbers from is 
<tt>[low,low+N-1]</tt>. Hint: If
+   *                        <tt>low==0</tt>, then draws random numbers from 
the interval <tt>[0,N-1]</tt>.
+   * @param values          the array into which the random numbers are to be 
filled; must have a length <tt>&gt;=
+   *                        count+fromIndex</tt>.
+   * @param fromIndex       the first index within <tt>values</tt> to be 
filled with numbers (inclusive).
+   * @param randomGenerator a random number generator.
+   */
+  private static void sampleMethodD(long n, long N, int count, long low, 
long[] values, int fromIndex,
+                                    Random randomGenerator) {
+    long chosen = -1 + low;
+
+    double nreal = n;
+    double ninv = 1.0 / nreal;
+    double Nreal = N;
+    double vprime = Math.exp(Math.log(randomGenerator.nextDouble()) * ninv);
+    long qu1 = -n + 1 + N;
+    double qu1real = -nreal + 1.0 + Nreal;
+    long negalphainv = -13;
+    //tuning paramter, determines when to switch from method D to method A. 
Dependent on programming
+    // language, platform, etc.
+    long threshold = -negalphainv * n;
+
+    long S;
+    while (n > 1 && count > 0 && threshold < N) {
+      double nmin1inv = 1.0 / (-1.0 + nreal);
+      double negSreal;
+      while (true) {
+        double X;
+        while (true) { // step D2: generate U and X
+          X = Nreal * (-vprime + 1.0);
+          S = (long) X;
+          if (S < qu1) {
+            break;
+          }
+          vprime = Math.exp(Math.log(randomGenerator.nextDouble()) * ninv);
+        }
+        double U = randomGenerator.nextDouble();
+        negSreal = -S;
+
+        //step D3: Accept?
+        double y1 = Math.exp(Math.log(U * Nreal / qu1real) * nmin1inv);
+        vprime = y1 * (-X / Nreal + 1.0) * qu1real / (negSreal + qu1real);
+        if (vprime <= 1.0) {
+          break;
+        } //break inner loop
+
+        //step D4: Accept?
+        double top = -1.0 + Nreal;
+        long limit;
+        double bottom;
+        if (n - 1 > S) {
+          bottom = -nreal + Nreal;
+          limit = -S + N;
+        } else {
+          bottom = -1.0 + negSreal + Nreal;
+          limit = qu1;
+        }
+        double y2 = 1.0;
+        for (long t = N - 1; t >= limit; t--) {
+          y2 *= top / bottom;
+          top--;
+          bottom--;
+        }
+        if (Nreal / (-X + Nreal) >= y1 * Math.exp(Math.log(y2) * nmin1inv)) {
+          // accept !
+          vprime = Math.exp(Math.log(randomGenerator.nextDouble()) * nmin1inv);
+          break; //break inner loop
+        }
+        vprime = Math.exp(Math.log(randomGenerator.nextDouble()) * ninv);
+      }
+
+      //step D5: select the (S+1)st record !
+      chosen += S + 1;
+      values[fromIndex++] = chosen;
+      /*
+      // invert
+      for (int iter=0; iter<S && count > 0; iter++) {
+        values[fromIndex++] = ++chosen;
+        count--;
+      }
+      chosen++;
+      */
+      count--;
+
+      N -= S + 1;
+      Nreal = negSreal - 1.0 + Nreal;
+      n--;
+      nreal--;
+      ninv = nmin1inv;
+      qu1 = -S + qu1;
+      qu1real = negSreal + qu1real;
+      threshold += negalphainv;
+    } //end while
+
+
+    if (count > 0) {
+      if (n > 1) { //faster to use method A to finish the sampling
+        sampleMethodA(n, N, count, chosen + 1, values, fromIndex, 
randomGenerator);
+      } else {
+        //special case n==1
+        S = (long) (N * vprime);
+        chosen += S + 1;
+        values[fromIndex++] = chosen;
+      }
+    }
+  }
+
+}


http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/jet/stat/Gamma.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/mahout/math/jet/stat/Gamma.java 
b/core/src/main/java/org/apache/mahout/math/jet/stat/Gamma.java
new file mode 100644
index 0000000..3ab61a6
--- /dev/null
+++ b/core/src/main/java/org/apache/mahout/math/jet/stat/Gamma.java
@@ -0,0 +1,681 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+Copyright 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its 
documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear 
in all copies and 
+that both that copyright notice and this permission notice appear in 
supporting documentation. 
+CERN makes no representations about the suitability of this software for any 
purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.math.jet.stat;
+
+import org.apache.mahout.math.jet.math.Constants;
+import org.apache.mahout.math.jet.math.Polynomial;
+
+/** Partially deprecated until unit tests are in place.  Until this time, this 
class/interface is unsupported. */
+public final class Gamma {
+
+  private static final double MAXSTIR = 143.01608;
+
+  private Gamma() {
+  }
+
+  /**
+   * Returns the beta function of the arguments.
+   * <pre>
+   *                   -     -
+   *                  | (a) | (b)
+   * beta( a, b )  =  -----------.
+   *                     -
+   *                    | (a+b)
+   * </pre>
+   * @param alpha
+   * @param beta
+   * @return The beta function for given values of alpha and beta.
+   */
+  public static double beta(double alpha, double beta) {
+    double y;
+    if (alpha < 40 && beta < 40) {
+      y = gamma(alpha + beta);
+      if (y == 0.0) {
+        return 1.0;
+      }
+
+      if (alpha > beta) {
+        y = gamma(alpha) / y;
+        y *= gamma(beta);
+      } else {
+        y = gamma(beta) / y;
+        y *= gamma(alpha);
+      }
+    } else {
+      y = Math.exp(logGamma(alpha) + logGamma(beta) - logGamma(alpha + beta));
+    }
+
+    return y;
+  }
+
+  /** Returns the Gamma function of the argument. */
+  public static double gamma(double x) {
+
+    double[] pCoefficient = {
+      1.60119522476751861407E-4,
+      1.19135147006586384913E-3,
+      1.04213797561761569935E-2,
+      4.76367800457137231464E-2,
+      2.07448227648435975150E-1,
+      4.94214826801497100753E-1,
+      9.99999999999999996796E-1
+    };
+    double[] qCoefficient = {
+      -2.31581873324120129819E-5,
+      5.39605580493303397842E-4,
+      -4.45641913851797240494E-3,
+      1.18139785222060435552E-2,
+      3.58236398605498653373E-2,
+      -2.34591795718243348568E-1,
+      7.14304917030273074085E-2,
+      1.00000000000000000320E0
+    };
+//double MAXGAM = 171.624376956302725;
+//double LOGPI  = 1.14472988584940017414;
+
+    double p;
+    double z;
+
+    double q = Math.abs(x);
+
+    if (q > 33.0) {
+      if (x < 0.0) {
+        p = Math.floor(q);
+        if (p == q) {
+          throw new ArithmeticException("gamma: overflow");
+        }
+        //int i = (int) p;
+        z = q - p;
+        if (z > 0.5) {
+          p += 1.0;
+          z = q - p;
+        }
+        z = q * Math.sin(Math.PI * z);
+        if (z == 0.0) {
+          throw new ArithmeticException("gamma: overflow");
+        }
+        z = Math.abs(z);
+        z = Math.PI / (z * stirlingFormula(q));
+
+        return -z;
+      } else {
+        return stirlingFormula(x);
+      }
+    }
+
+    z = 1.0;
+    while (x >= 3.0) {
+      x -= 1.0;
+      z *= x;
+    }
+
+    while (x < 0.0) {
+      if (x == 0.0) {
+        throw new ArithmeticException("gamma: singular");
+      }
+      if (x > -1.0e-9) {
+        return z / ((1.0 + 0.5772156649015329 * x) * x);
+      }
+      z /= x;
+      x += 1.0;
+    }
+
+    while (x < 2.0) {
+      if (x == 0.0) {
+        throw new ArithmeticException("gamma: singular");
+      }
+      if (x < 1.0e-9) {
+        return z / ((1.0 + 0.5772156649015329 * x) * x);
+      }
+      z /= x;
+      x += 1.0;
+    }
+
+    if ((x == 2.0) || (x == 3.0)) {
+      return z;
+    }
+
+    x -= 2.0;
+    p = Polynomial.polevl(x, pCoefficient, 6);
+    q = Polynomial.polevl(x, qCoefficient, 7);
+    return z * p / q;
+
+  }
+
+  /**
+   * Returns the regularized Incomplete Beta Function evaluated from zero to 
<tt>xx</tt>; formerly named <tt>ibeta</tt>.
+   *
+   * See 
http://en.wikipedia.org/wiki/Incomplete_beta_function#Incomplete_beta_function
+   *
+   * @param alpha the alpha parameter of the beta distribution.
+   * @param beta the beta parameter of the beta distribution.
+   * @param xx the integration end point.
+   */
+  public static double incompleteBeta(double alpha, double beta, double xx) {
+
+    if (alpha <= 0.0) {
+      throw new ArithmeticException("incompleteBeta: Domain error! alpha must 
be > 0, but was " + alpha);
+    }
+
+    if (beta <= 0.0) {
+      throw new ArithmeticException("incompleteBeta: Domain error! beta must 
be > 0, but was " + beta);
+    }
+
+    if (xx <= 0.0) {
+      return 0.0;
+    }
+
+    if (xx >= 1.0) {
+      return 1.0;
+    }
+
+    double t;
+    if ((beta * xx) <= 1.0 && xx <= 0.95) {
+      t = powerSeries(alpha, beta, xx);
+      return t;
+    }
+
+    double w = 1.0 - xx;
+
+    /* Reverse a and b if x is greater than the mean. */
+    double xc;
+    double x;
+    double b;
+    double a;
+    boolean flag = false;
+    if (xx > (alpha / (alpha + beta))) {
+      flag = true;
+      a = beta;
+      b = alpha;
+      xc = xx;
+      x = w;
+    } else {
+      a = alpha;
+      b = beta;
+      xc = w;
+      x = xx;
+    }
+
+    if (flag && (b * x) <= 1.0 && x <= 0.95) {
+      t = powerSeries(a, b, x);
+      t = t <= Constants.MACHEP ? 1.0 - Constants.MACHEP : 1.0 - t;
+      return t;
+    }
+
+    /* Choose expansion for better convergence. */
+    double y = x * (a + b - 2.0) - (a - 1.0);
+    w = y < 0.0 ? incompleteBetaFraction1(a, b, x) : 
incompleteBetaFraction2(a, b, x) / xc;
+
+    /* Multiply w by the factor
+       a      b   _             _     _
+      x  (1-x)   | (a+b) / ( a | (a) | (b) ) .   */
+
+    y = a * Math.log(x);
+    t = b * Math.log(xc);
+    if ((a + b) < Constants.MAXGAM && Math.abs(y) < Constants.MAXLOG && 
Math.abs(t) < Constants.MAXLOG) {
+      t = Math.pow(xc, b);
+      t *= Math.pow(x, a);
+      t /= a;
+      t *= w;
+      t *= gamma(a + b) / (gamma(a) * gamma(b));
+      if (flag) {
+        t = t <= Constants.MACHEP ? 1.0 - Constants.MACHEP : 1.0 - t;
+      }
+      return t;
+    }
+    /* Resort to logarithms.  */
+    y += t + logGamma(a + b) - logGamma(a) - logGamma(b);
+    y += Math.log(w / a);
+    t = y < Constants.MINLOG ? 0.0 : Math.exp(y);
+
+    if (flag) {
+      t = t <= Constants.MACHEP ? 1.0 - Constants.MACHEP : 1.0 - t;
+    }
+    return t;
+  }
+
+  /** Continued fraction expansion #1 for incomplete beta integral; formerly 
named <tt>incbcf</tt>. */
+  static double incompleteBetaFraction1(double a, double b, double x) {
+
+    double k1 = a;
+    double k2 = a + b;
+    double k3 = a;
+    double k4 = a + 1.0;
+    double k5 = 1.0;
+    double k6 = b - 1.0;
+    double k7 = k4;
+    double k8 = a + 2.0;
+
+    double pkm2 = 0.0;
+    double qkm2 = 1.0;
+    double pkm1 = 1.0;
+    double qkm1 = 1.0;
+    double ans = 1.0;
+    double r = 1.0;
+    int n = 0;
+    double thresh = 3.0 * Constants.MACHEP;
+    do {
+      double xk = -(x * k1 * k2) / (k3 * k4);
+      double pk = pkm1 + pkm2 * xk;
+      double qk = qkm1 + qkm2 * xk;
+      pkm2 = pkm1;
+      pkm1 = pk;
+      qkm2 = qkm1;
+      qkm1 = qk;
+
+      xk = (x * k5 * k6) / (k7 * k8);
+      pk = pkm1 + pkm2 * xk;
+      qk = qkm1 + qkm2 * xk;
+      pkm2 = pkm1;
+      pkm1 = pk;
+      qkm2 = qkm1;
+      qkm1 = qk;
+
+      if (qk != 0) {
+        r = pk / qk;
+      }
+      double t;
+      if (r != 0) {
+        t = Math.abs((ans - r) / r);
+        ans = r;
+      } else {
+        t = 1.0;
+      }
+
+      if (t < thresh) {
+        return ans;
+      }
+
+      k1 += 1.0;
+      k2 += 1.0;
+      k3 += 2.0;
+      k4 += 2.0;
+      k5 += 1.0;
+      k6 -= 1.0;
+      k7 += 2.0;
+      k8 += 2.0;
+
+      if ((Math.abs(qk) + Math.abs(pk)) > Constants.BIG) {
+        pkm2 *= Constants.BIG_INVERSE;
+        pkm1 *= Constants.BIG_INVERSE;
+        qkm2 *= Constants.BIG_INVERSE;
+        qkm1 *= Constants.BIG_INVERSE;
+      }
+      if ((Math.abs(qk) < Constants.BIG_INVERSE) || (Math.abs(pk) < 
Constants.BIG_INVERSE)) {
+        pkm2 *= Constants.BIG;
+        pkm1 *= Constants.BIG;
+        qkm2 *= Constants.BIG;
+        qkm1 *= Constants.BIG;
+      }
+    } while (++n < 300);
+
+    return ans;
+  }
+
+  /** Continued fraction expansion #2 for incomplete beta integral; formerly 
named <tt>incbd</tt>. */
+  static double incompleteBetaFraction2(double a, double b, double x) {
+
+    double k1 = a;
+    double k2 = b - 1.0;
+    double k3 = a;
+    double k4 = a + 1.0;
+    double k5 = 1.0;
+    double k6 = a + b;
+    double k7 = a + 1.0;
+    double k8 = a + 2.0;
+
+    double pkm2 = 0.0;
+    double qkm2 = 1.0;
+    double pkm1 = 1.0;
+    double qkm1 = 1.0;
+    double z = x / (1.0 - x);
+    double ans = 1.0;
+    double r = 1.0;
+    int n = 0;
+    double thresh = 3.0 * Constants.MACHEP;
+    do {
+      double xk = -(z * k1 * k2) / (k3 * k4);
+      double pk = pkm1 + pkm2 * xk;
+      double qk = qkm1 + qkm2 * xk;
+      pkm2 = pkm1;
+      pkm1 = pk;
+      qkm2 = qkm1;
+      qkm1 = qk;
+
+      xk = (z * k5 * k6) / (k7 * k8);
+      pk = pkm1 + pkm2 * xk;
+      qk = qkm1 + qkm2 * xk;
+      pkm2 = pkm1;
+      pkm1 = pk;
+      qkm2 = qkm1;
+      qkm1 = qk;
+
+      if (qk != 0) {
+        r = pk / qk;
+      }
+      double t;
+      if (r != 0) {
+        t = Math.abs((ans - r) / r);
+        ans = r;
+      } else {
+        t = 1.0;
+      }
+
+      if (t < thresh) {
+        return ans;
+      }
+
+      k1 += 1.0;
+      k2 -= 1.0;
+      k3 += 2.0;
+      k4 += 2.0;
+      k5 += 1.0;
+      k6 += 1.0;
+      k7 += 2.0;
+      k8 += 2.0;
+
+      if ((Math.abs(qk) + Math.abs(pk)) > Constants.BIG) {
+        pkm2 *= Constants.BIG_INVERSE;
+        pkm1 *= Constants.BIG_INVERSE;
+        qkm2 *= Constants.BIG_INVERSE;
+        qkm1 *= Constants.BIG_INVERSE;
+      }
+      if ((Math.abs(qk) < Constants.BIG_INVERSE) || (Math.abs(pk) < 
Constants.BIG_INVERSE)) {
+        pkm2 *= Constants.BIG;
+        pkm1 *= Constants.BIG;
+        qkm2 *= Constants.BIG;
+        qkm1 *= Constants.BIG;
+      }
+    } while (++n < 300);
+
+    return ans;
+  }
+
+  /**
+   * Returns the Incomplete Gamma function; formerly named <tt>igamma</tt>.
+   *
+   * @param alpha the shape parameter of the gamma distribution.
+   * @param x the integration end point.
+   * @return The value of the unnormalized incomplete gamma function.
+   */
+  public static double incompleteGamma(double alpha, double x) {
+    if (x <= 0 || alpha <= 0) {
+      return 0.0;
+    }
+
+    if (x > 1.0 && x > alpha) {
+      return 1.0 - incompleteGammaComplement(alpha, x);
+    }
+
+    /* Compute  x**a * exp(-x) / gamma(a)  */
+    double ax = alpha * Math.log(x) - x - logGamma(alpha);
+    if (ax < -Constants.MAXLOG) {
+      return 0.0;
+    }
+
+    ax = Math.exp(ax);
+
+    /* power series */
+    double r = alpha;
+    double c = 1.0;
+    double ans = 1.0;
+
+    do {
+      r += 1.0;
+      c *= x / r;
+      ans += c;
+    }
+    while (c / ans > Constants.MACHEP);
+
+    return ans * ax / alpha;
+
+  }
+
+  /**
+   * Returns the Complemented Incomplete Gamma function; formerly named 
<tt>igamc</tt>.
+   *
+   * @param alpha the shape parameter of the gamma distribution.
+   * @param x the integration start point.
+   */
+  public static double incompleteGammaComplement(double alpha, double x) {
+
+    if (x <= 0 || alpha <= 0) {
+      return 1.0;
+    }
+
+    if (x < 1.0 || x < alpha) {
+      return 1.0 - incompleteGamma(alpha, x);
+    }
+
+    double ax = alpha * Math.log(x) - x - logGamma(alpha);
+    if (ax < -Constants.MAXLOG) {
+      return 0.0;
+    }
+
+    ax = Math.exp(ax);
+
+    /* continued fraction */
+    double y = 1.0 - alpha;
+    double z = x + y + 1.0;
+    double c = 0.0;
+    double pkm2 = 1.0;
+    double qkm2 = x;
+    double pkm1 = x + 1.0;
+    double qkm1 = z * x;
+    double ans = pkm1 / qkm1;
+
+    double t;
+    do {
+      c += 1.0;
+      y += 1.0;
+      z += 2.0;
+      double yc = y * c;
+      double pk = pkm1 * z - pkm2 * yc;
+      double qk = qkm1 * z - qkm2 * yc;
+      if (qk != 0) {
+        double r = pk / qk;
+        t = Math.abs((ans - r) / r);
+        ans = r;
+      } else {
+        t = 1.0;
+      }
+
+      pkm2 = pkm1;
+      pkm1 = pk;
+      qkm2 = qkm1;
+      qkm1 = qk;
+      if (Math.abs(pk) > Constants.BIG) {
+        pkm2 *= Constants.BIG_INVERSE;
+        pkm1 *= Constants.BIG_INVERSE;
+        qkm2 *= Constants.BIG_INVERSE;
+        qkm1 *= Constants.BIG_INVERSE;
+      }
+    } while (t > Constants.MACHEP);
+
+    return ans * ax;
+  }
+
+  /** Returns the natural logarithm of the gamma function; formerly named 
<tt>lgamma</tt>. */
+  public static double logGamma(double x) {
+    double p;
+    double q;
+    double z;
+
+    double[] aCoefficient = {
+      8.11614167470508450300E-4,
+      -5.95061904284301438324E-4,
+      7.93650340457716943945E-4,
+      -2.77777777730099687205E-3,
+      8.33333333333331927722E-2
+    };
+    double[] bCoefficient = {
+      -1.37825152569120859100E3,
+      -3.88016315134637840924E4,
+      -3.31612992738871184744E5,
+      -1.16237097492762307383E6,
+      -1.72173700820839662146E6,
+      -8.53555664245765465627E5
+    };
+    double[] cCoefficient = {
+      /* 1.00000000000000000000E0, */
+      -3.51815701436523470549E2,
+      -1.70642106651881159223E4,
+      -2.20528590553854454839E5,
+      -1.13933444367982507207E6,
+      -2.53252307177582951285E6,
+      -2.01889141433532773231E6
+    };
+
+    if (x < -34.0) {
+      q = -x;
+      double w = logGamma(q);
+      p = Math.floor(q);
+      if (p == q) {
+        throw new ArithmeticException("lgam: Overflow");
+      }
+      z = q - p;
+      if (z > 0.5) {
+        p += 1.0;
+        z = p - q;
+      }
+      z = q * Math.sin(Math.PI * z);
+      if (z == 0.0) {
+        throw new
+            ArithmeticException("lgamma: Overflow");
+      }
+      z = Constants.LOGPI - Math.log(z) - w;
+      return z;
+    }
+
+    if (x < 13.0) {
+      z = 1.0;
+      while (x >= 3.0) {
+        x -= 1.0;
+        z *= x;
+      }
+      while (x < 2.0) {
+        if (x == 0.0) {
+          throw new ArithmeticException("lgamma: Overflow");
+        }
+        z /= x;
+        x += 1.0;
+      }
+      if (z < 0.0) {
+        z = -z;
+      }
+      if (x == 2.0) {
+        return Math.log(z);
+      }
+      x -= 2.0;
+      p = x * Polynomial.polevl(x, bCoefficient, 5) / Polynomial.p1evl(x, 
cCoefficient, 6);
+      return Math.log(z) + p;
+    }
+
+    if (x > 2.556348e305) {
+      throw new ArithmeticException("lgamma: Overflow");
+    }
+
+    q = (x - 0.5) * Math.log(x) - x + 0.91893853320467274178;
+    //if ( x > 1.0e8 ) return( q );
+    if (x > 1.0e8) {
+      return q;
+    }
+
+    p = 1.0 / (x * x);
+    if (x >= 1000.0) {
+      q += ((7.9365079365079365079365e-4 * p
+          - 2.7777777777777777777778e-3) * p
+          + 0.0833333333333333333333) / x;
+    } else {
+      q += Polynomial.polevl(p, aCoefficient, 4) / x;
+    }
+    return q;
+  }
+
+  /**
+   * Power series for incomplete beta integral; formerly named 
<tt>pseries</tt>. Use when b*x is small and x not too
+   * close to 1.
+   */
+  private static double powerSeries(double a, double b, double x) {
+
+    double ai = 1.0 / a;
+    double u = (1.0 - b) * x;
+    double v = u / (a + 1.0);
+    double t1 = v;
+    double t = u;
+    double n = 2.0;
+    double s = 0.0;
+    double z = Constants.MACHEP * ai;
+    while (Math.abs(v) > z) {
+      u = (n - b) * x / n;
+      t *= u;
+      v = t / (a + n);
+      s += v;
+      n += 1.0;
+    }
+    s += t1;
+    s += ai;
+
+    u = a * Math.log(x);
+    if ((a + b) < Constants.MAXGAM && Math.abs(u) < Constants.MAXLOG) {
+      t = gamma(a + b) / (gamma(a) * gamma(b));
+      s *= t * Math.pow(x, a);
+    } else {
+      t = logGamma(a + b) - logGamma(a) - logGamma(b) + u + Math.log(s);
+      s = t < Constants.MINLOG ? 0.0 : Math.exp(t);
+    }
+    return s;
+  }
+
+  /**
+   * Returns the Gamma function computed by Stirling's formula; formerly named 
<tt>stirf</tt>. The polynomial STIR is
+   * valid for 33 <= x <= 172.
+   */
+  static double stirlingFormula(double x) {
+    double[] coefficients = {
+      7.87311395793093628397E-4,
+      -2.29549961613378126380E-4,
+      -2.68132617805781232825E-3,
+      3.47222221605458667310E-3,
+      8.33333333333482257126E-2,
+    };
+
+    double w = 1.0 / x;
+    double y = Math.exp(x);
+
+    w = 1.0 + w * Polynomial.polevl(w, coefficients, 4);
+
+    if (x > MAXSTIR) {
+      /* Avoid overflow in Math.pow() */
+      double v = Math.pow(x, 0.5 * x - 0.25);
+      y = v * (v / y);
+    } else {
+      y = Math.pow(x, x - 0.5) / y;
+    }
+    y = Constants.SQTPI * y * w;
+    return y;
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/jet/stat/Probability.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/mahout/math/jet/stat/Probability.java 
b/core/src/main/java/org/apache/mahout/math/jet/stat/Probability.java
new file mode 100644
index 0000000..bcd1a86
--- /dev/null
+++ b/core/src/main/java/org/apache/mahout/math/jet/stat/Probability.java
@@ -0,0 +1,203 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+Copyright 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its 
documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear 
in all copies and 
+that both that copyright notice and this permission notice appear in 
supporting documentation. 
+CERN makes no representations about the suitability of this software for any 
purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.math.jet.stat;
+
+import org.apache.mahout.math.jet.random.Normal;
+
+/** Partially deprecated until unit tests are in place.  Until this time, this 
class/interface is unsupported. */
+public final class Probability {
+
+  private static final Normal UNIT_NORMAL = new Normal(0, 1, null);
+
+  private Probability() {
+  }
+
+  /**
+   * Returns the area from zero to <tt>x</tt> under the beta density function.
+   * <pre>
+   *                          x
+   *            -             -
+   *           | (a+b)       | |  a-1      b-1
+   * P(x)  =  ----------     |   t    (1-t)    dt
+   *           -     -     | |
+   *          | (a) | (b)   -
+   *                         0
+   * </pre>
+   * This function is identical to the incomplete beta integral function 
<tt>Gamma.incompleteBeta(a, b, x)</tt>.
+   *
+   * The complemented function is
+   *
+   * <tt>1 - P(1-x)  =  Gamma.incompleteBeta( b, a, x )</tt>;
+   */
+  public static double beta(double a, double b, double x) {
+    return Gamma.incompleteBeta(a, b, x);
+  }
+
+  /**
+   * Returns the integral from zero to <tt>x</tt> of the gamma probability 
density function.
+   * <pre>
+   *
+   *          alpha     - x
+   *       beta        |     alpha-1  -beta t
+   * y =  ---------    |    t         e        dt
+   *       -           |
+   *      | (alpha)   -  0
+   * </pre>
+   * The incomplete gamma integral is used, according to the relation
+   *
+   * <tt>y = Gamma.incompleteGamma( alpha, beta*x )</tt>.
+   *
+   * See 
http://en.wikipedia.org/wiki/Gamma_distribution#Probability_density_function
+   *
+   * @param alpha the shape parameter of the gamma distribution.
+   * @param beta the rate parameter of the gamma distribution.
+   * @param x integration end point.
+   */
+  public static double gamma(double alpha, double beta, double x) {
+    if (x < 0.0) {
+      return 0.0;
+    }
+    return Gamma.incompleteGamma(alpha, beta * x);
+  }
+
+  /**
+   * Returns the sum of the terms <tt>0</tt> through <tt>k</tt> of the 
Negative Binomial Distribution.
+   * {@code
+   *   k
+   *   --  ( n+j-1 )   n      j
+   *   >   (       )  p  (1-p)
+   *   --  (   j   )
+   *  j=0
+   * }
+   * In a sequence of Bernoulli trials, this is the probability that 
<tt>k</tt> or fewer failures precede the
+   * <tt>n</tt>-th success. <p> The terms are not computed individually; 
instead the incomplete beta integral is
+   * employed, according to the formula <p> <tt>y = negativeBinomial( k, n, p 
) = Gamma.incompleteBeta( n, k+1, p
+   * )</tt>.
+   *
+   * All arguments must be positive,
+   *
+   * @param k end term.
+   * @param n the number of trials.
+   * @param p the probability of success (must be in <tt>(0.0,1.0)</tt>).
+   */
+  public static double negativeBinomial(int k, int n, double p) {
+    if (p < 0.0 || p > 1.0) {
+      throw new IllegalArgumentException();
+    }
+    if (k < 0) {
+      return 0.0;
+    }
+
+    return Gamma.incompleteBeta(n, k + 1, p);
+  }
+
+  /**
+   * Returns the area under the Normal (Gaussian) probability density 
function, integrated from minus infinity to
+   * <tt>x</tt> (assumes mean is zero, variance is one).
+   * {@code
+   *                            x
+   *                             -
+   *                   1        | |          2
+   *  normal(x)  = ---------    |    exp( - t /2 ) dt
+   *               sqrt(2pi)  | |
+   *                           -
+   *                          -inf.
+   *
+   *             =  ( 1 + erf(z) ) / 2
+   *             =  erfc(z) / 2
+   * }
+   * where <tt>z = x/sqrt(2)</tt>. Computation is via the functions 
<tt>errorFunction</tt> and
+   * <tt>errorFunctionComplement</tt>.
+   * <p>
+   * Computed using method 26.2.17 from Abramovitz and Stegun (see 
http://www.math.sfu.ca/~cbm/aands/page_932.htm
+   * and 
http://en.wikipedia.org/wiki/Normal_distribution#Numerical_approximations_of_the_normal_cdf
+   */
+
+  public static double normal(double a) {
+    if (a < 0) {
+      return 1 - normal(-a);
+    }
+    double b0 = 0.2316419;
+    double b1 = 0.319381530;
+    double b2 = -0.356563782;
+    double b3 = 1.781477937;
+    double b4 = -1.821255978;
+    double b5 = 1.330274429;
+    double t = 1 / (1 + b0 * a);
+    return 1 - UNIT_NORMAL.pdf(a) * t * (b1 + t * (b2 + t * (b3 + t * (b4 + t 
* b5))));
+  }
+
+  /**
+   * Returns the area under the Normal (Gaussian) probability density 
function, integrated from minus infinity to
+   * <tt>x</tt>.
+   * {@code
+   *                            x
+   *                             -
+   *                   1        | |                 2
+   *  normal(x)  = ---------    |    exp( - (t-mean) / 2v ) dt
+   *               sqrt(2pi*v)| |
+   *                           -
+   *                          -inf.
+   *
+   * }
+   * where <tt>v = variance</tt>. Computation is via the functions 
<tt>errorFunction</tt>.
+   *
+   * @param mean     the mean of the normal distribution.
+   * @param variance the variance of the normal distribution.
+   * @param x        the integration limit.
+   */
+  public static double normal(double mean, double variance, double x) {
+    return normal((x - mean) / Math.sqrt(variance));
+  }
+
+  /**
+   * Returns the sum of the first <tt>k</tt> terms of the Poisson distribution.
+   * <pre>
+   *   k         j
+   *   --   -m  m
+   *   >   e    --
+   *   --       j!
+   *  j=0
+   * </pre>
+   * The terms are not summed directly; instead the incomplete gamma integral 
is employed, according to the relation <p>
+   * <tt>y = poisson( k, m ) = Gamma.incompleteGammaComplement( k+1, m )</tt>.
+   *
+   * The arguments must both be positive.
+   *
+   * @param k    number of terms.
+   * @param mean the mean of the poisson distribution.
+   */
+  public static double poisson(int k, double mean) {
+    if (mean < 0) {
+      throw new IllegalArgumentException();
+    }
+    if (k < 0) {
+      return 0.0;
+    }
+    return Gamma.incompleteGammaComplement(k + 1, mean);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/jet/stat/package-info.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/mahout/math/jet/stat/package-info.java 
b/core/src/main/java/org/apache/mahout/math/jet/stat/package-info.java
new file mode 100644
index 0000000..1d4d7bd
--- /dev/null
+++ b/core/src/main/java/org/apache/mahout/math/jet/stat/package-info.java
@@ -0,0 +1,5 @@
+/**
+ * Tools for basic and advanced statistics: Estimators, Gamma functions, Beta 
functions, Probabilities,
+ * Special integrals, etc.
+ */
+package org.apache.mahout.math.jet.stat;

http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/list/AbstractList.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/mahout/math/list/AbstractList.java 
b/core/src/main/java/org/apache/mahout/math/list/AbstractList.java
new file mode 100644
index 0000000..c672f40
--- /dev/null
+++ b/core/src/main/java/org/apache/mahout/math/list/AbstractList.java
@@ -0,0 +1,247 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+ /*
+Copyright Ã¯Â¿Â½ 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its 
documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear 
in all copies and 
+that both that copyright notice and this permission notice appear in 
supporting documentation. 
+CERN makes no representations about the suitability of this software for any 
purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+/*
+Copyright 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its 
documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear 
in all copies and 
+that both that copyright notice and this permission notice appear in 
supporting documentation. 
+CERN makes no representations about the suitability of this software for any 
purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.math.list;
+
+import org.apache.mahout.math.PersistentObject;
+
+/**
+ * Abstract base class for resizable lists holding objects or primitive data 
types such as
+ * {@code int}, {@code float}, etc.
+ * First see the <a href="package-summary.html">package summary</a> and javadoc
+ * <a href="package-tree.html">tree view</a> to get the broad picture.
+ * <p>
+ * <b>Note that this implementation is not synchronized.</b>
+ *
+ * @author [email protected]
+ * @version 1.0, 09/24/99
+ * @see     java.util.ArrayList
+ * @see      java.util.Vector
+ * @see      java.util.Arrays
+ */
+public abstract class AbstractList extends PersistentObject {
+  
+  public abstract int size();
+  
+  public boolean isEmpty() {
+    return size() == 0;
+  }
+
+  /**
+   * Inserts <tt>length</tt> dummy elements before the specified position into 
the receiver. Shifts the element
+   * currently at that position (if any) and any subsequent elements to the 
right. <b>This method must set the new size
+   * to be <tt>size()+length</tt></b>.
+   *
+   * @param index  index before which to insert dummy elements (must be in 
[0,size])..
+   * @param length number of dummy elements to be inserted.
+   * @throws IndexOutOfBoundsException if <tt>index &lt; 0 || index &gt; 
size()</tt>.
+   */
+  protected abstract void beforeInsertDummies(int index, int length);
+
+  /** Checks if the given index is in range. */
+  protected static void checkRange(int index, int theSize) {
+    if (index >= theSize || index < 0) {
+      throw new IndexOutOfBoundsException("Index: " + index + ", Size: " + 
theSize);
+    }
+  }
+
+  /**
+   * Checks if the given range is within the contained array's bounds.
+   *
+   * @throws IndexOutOfBoundsException if <tt>to!=from-1 || from&lt;0 || 
from&gt;to || to&gt;=size()</tt>.
+   */
+  protected static void checkRangeFromTo(int from, int to, int theSize) {
+    if (to == from - 1) {
+      return;
+    }
+    if (from < 0 || from > to || to >= theSize) {
+      throw new IndexOutOfBoundsException("from: " + from + ", to: " + to + ", 
size=" + theSize);
+    }
+  }
+
+  /**
+   * Removes all elements from the receiver.  The receiver will be empty after 
this call returns, but keep its current
+   * capacity.
+   */
+  public void clear() {
+    removeFromTo(0, size() - 1);
+  }
+
+  /**
+   * Sorts the receiver into ascending order. This sort is guaranteed to be 
<i>stable</i>:  equal elements will not be
+   * reordered as a result of the sort.<p>
+   *
+   * The sorting algorithm is a modified mergesort (in which the merge is 
omitted if the highest element in the low
+   * sublist is less than the lowest element in the high sublist).  This 
algorithm offers guaranteed n*log(n)
+   * performance, and can approach linear performance on nearly sorted lists.
+   *
+   * <p><b>You should never call this method unless you are sure that this 
particular sorting algorithm is the right one
+   * for your data set.</b> It is generally better to call <tt>sort()</tt> or 
<tt>sortFromTo(...)</tt> instead, because
+   * those methods automatically choose the best sorting algorithm.
+   */
+  public final void mergeSort() {
+    mergeSortFromTo(0, size() - 1);
+  }
+
+  /**
+   * Sorts the receiver into ascending order. This sort is guaranteed to be 
<i>stable</i>:  equal elements will not be
+   * reordered as a result of the sort.<p>
+   *
+   * The sorting algorithm is a modified mergesort (in which the merge is 
omitted if the highest element in the low
+   * sublist is less than the lowest element in the high sublist).  This 
algorithm offers guaranteed n*log(n)
+   * performance, and can approach linear performance on nearly sorted lists.
+   *
+   * <p><b>You should never call this method unless you are sure that this 
particular sorting algorithm is the right one
+   * for your data set.</b> It is generally better to call <tt>sort()</tt> or 
<tt>sortFromTo(...)</tt> instead, because
+   * those methods automatically choose the best sorting algorithm.
+   *
+   * @param from the index of the first element (inclusive) to be sorted.
+   * @param to   the index of the last element (inclusive) to be sorted.
+   * @throws IndexOutOfBoundsException if <tt>(from&lt;0 || from&gt;to || 
to&gt;=size()) && to!=from-1</tt>.
+   */
+  public abstract void mergeSortFromTo(int from, int to);
+
+  /**
+   * Sorts the receiver into ascending order.  The sorting algorithm is a 
tuned quicksort, adapted from Jon L. Bentley
+   * and M. Douglas McIlroy's "Engineering a Sort Function", Software-Practice 
and Experience, Vol. 23(11) P. 1249-1265
+   * (November 1993).  This algorithm offers n*log(n) performance on many data 
sets that cause other quicksorts to
+   * degrade to quadratic performance.
+   *
+   * <p><b>You should never call this method unless you are sure that this 
particular sorting algorithm is the right one
+   * for your data set.</b> It is generally better to call <tt>sort()</tt> or 
<tt>sortFromTo(...)</tt> instead, because
+   * those methods automatically choose the best sorting algorithm.
+   */
+  public final void quickSort() {
+    quickSortFromTo(0, size() - 1);
+  }
+
+  /**
+   * Sorts the specified range of the receiver into ascending order.  The 
sorting algorithm is a tuned quicksort,
+   * adapted from Jon L. Bentley and M. Douglas McIlroy's "Engineering a Sort 
Function", Software-Practice and
+   * Experience, Vol. 23(11) P. 1249-1265 (November 1993).  This algorithm 
offers n*log(n) performance on many data sets
+   * that cause other quicksorts to degrade to quadratic performance.
+   *
+   * <p><b>You should never call this method unless you are sure that this 
particular sorting algorithm is the right one
+   * for your data set.</b> It is generally better to call <tt>sort()</tt> or 
<tt>sortFromTo(...)</tt> instead, because
+   * those methods automatically choose the best sorting algorithm.
+   *
+   * @param from the index of the first element (inclusive) to be sorted.
+   * @param to   the index of the last element (inclusive) to be sorted.
+   * @throws IndexOutOfBoundsException if <tt>(from&lt;0 || from&gt;to || 
to&gt;=size()) && to!=from-1</tt>.
+   */
+  public abstract void quickSortFromTo(int from, int to);
+
+  /**
+   * Removes the element at the specified position from the receiver. Shifts 
any subsequent elements to the left.
+   *
+   * @param index the index of the element to removed.
+   * @throws IndexOutOfBoundsException if <tt>index &lt; 0 || index &gt;= 
size()</tt>.
+   */
+  public void remove(int index) {
+    removeFromTo(index, index);
+  }
+
+  /**
+   * Removes from the receiver all elements whose index is between 
<code>from</code>, inclusive and <code>to</code>,
+   * inclusive.  Shifts any succeeding elements to the left (reduces their 
index). This call shortens the list by
+   * <tt>(to - from + 1)</tt> elements.
+   *
+   * @param fromIndex index of first element to be removed.
+   * @param toIndex   index of last element to be removed.
+   * @throws IndexOutOfBoundsException if <tt>(from&lt;0 || from&gt;to || 
to&gt;=size()) && to!=from-1</tt>.
+   */
+  public abstract void removeFromTo(int fromIndex, int toIndex);
+
+  /** Reverses the elements of the receiver. Last becomes first, second last 
becomes second first, and so on. */
+  public abstract void reverse();
+
+  /**
+   * Sets the size of the receiver. If the new size is greater than the 
current size, new null or zero items are added
+   * to the end of the receiver. If the new size is less than the current 
size, all components at index newSize and
+   * greater are discarded. This method does not release any superfluos 
internal memory. Use method <tt>trimToSize</tt>
+   * to release superfluos internal memory.
+   *
+   * @param newSize the new size of the receiver.
+   * @throws IndexOutOfBoundsException if <tt>newSize &lt; 0</tt>.
+   */
+  public void setSize(int newSize) {
+    if (newSize < 0) {
+      throw new IndexOutOfBoundsException("newSize:" + newSize);
+    }
+
+    int currentSize = size();
+    if (newSize != currentSize) {
+      if (newSize > currentSize) {
+        beforeInsertDummies(currentSize, newSize - currentSize);
+      } else if (newSize < currentSize) {
+        removeFromTo(newSize, currentSize - 1);
+      }
+    }
+  }
+
+  /**
+   * Sorts the receiver into ascending order.
+   *
+   * The sorting algorithm is dynamically chosen according to the 
characteristics of the data set.
+   *
+   * This implementation simply calls <tt>sortFromTo(...)</tt>. Override 
<tt>sortFromTo(...)</tt> if you can determine
+   * which sort is most appropriate for the given data set.
+   */
+  public final void sort() {
+    sortFromTo(0, size() - 1);
+  }
+
+  /**
+   * Sorts the specified range of the receiver into ascending order.
+   *
+   * The sorting algorithm is dynamically chosen according to the 
characteristics of the data set. This default
+   * implementation simply calls quickSort. Override this method if you can 
determine which sort is most appropriate for
+   * the given data set.
+   *
+   * @param from the index of the first element (inclusive) to be sorted.
+   * @param to   the index of the last element (inclusive) to be sorted.
+   * @throws IndexOutOfBoundsException if <tt>(from&lt;0 || from&gt;to || 
to&gt;=size()) && to!=from-1</tt>.
+   */
+  public void sortFromTo(int from, int to) {
+    quickSortFromTo(from, to);
+  }
+
+  /**
+   * Trims the capacity of the receiver to be the receiver's current size. 
Releases any superfluos internal memory. An
+   * application can use this operation to minimize the storage of the 
receiver. <p> This default implementation does
+   * nothing. Override this method in space efficient implementations.
+   */
+  public void trimToSize() {
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/list/AbstractObjectList.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/mahout/math/list/AbstractObjectList.java 
b/core/src/main/java/org/apache/mahout/math/list/AbstractObjectList.java
new file mode 100644
index 0000000..a1a5899
--- /dev/null
+++ b/core/src/main/java/org/apache/mahout/math/list/AbstractObjectList.java
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+Copyright 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its 
documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear 
in all copies and 
+that both that copyright notice and this permission notice appear in 
supporting documentation. 
+CERN makes no representations about the suitability of this software for any 
purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.math.list;
+
+import java.util.Collection;
+
+/**
+ Abstract base class for resizable lists holding objects or primitive data 
types such as <code>int</code>,
+ <code>float</code>, etc.First see the <a href="package-summary.html">package 
summary</a> and
+ javadoc <a href="package-tree.html">tree view</a> to get the broad picture.
+ <p>
+ <b>Note that this implementation is not synchronized.</b>
+
+ @author [email protected]
+ @version 1.0, 09/24/99
+ @see      java.util.ArrayList
+ @see      java.util.Vector
+ @see      java.util.Arrays
+ */
+public abstract class AbstractObjectList<T> extends AbstractList {
+
+  /**
+   * Appends all of the elements of the specified Collection to the receiver.
+   *
+   * @throws ClassCastException if an element in the collection is not of the 
same parameter type of the receiver.
+   */
+  public void addAllOf(Collection<T> collection) {
+    this.beforeInsertAllOf(size(), collection);
+  }
+
+  /**
+   * Inserts all elements of the specified collection before the specified 
position into the receiver. Shifts the
+   * element currently at that position (if any) and any subsequent elements 
to the right (increases their indices).
+   *
+   * @param index      index before which to insert first element from the 
specified collection.
+   * @param collection the collection to be inserted
+   * @throws ClassCastException        if an element in the collection is not 
of the same parameter type of the
+   *                                   receiver.
+   * @throws IndexOutOfBoundsException if <tt>index &lt; 0 || index &gt; 
size()</tt>.
+   */
+  public void beforeInsertAllOf(int index, Collection<T> collection) {
+    this.beforeInsertDummies(index, collection.size());
+    this.replaceFromWith(index, collection);
+  }
+
+  /**
+   * Replaces the part of the receiver starting at <code>from</code> 
(inclusive) with all the elements of the specified
+   * collection. Does not alter the size of the receiver. Replaces exactly 
<tt>Math.max(0,Math.min(size()-from,
+   * other.size()))</tt> elements.
+   *
+   * @param from  the index at which to copy the first element from the 
specified collection.
+   * @param other Collection to replace part of the receiver
+   * @throws IndexOutOfBoundsException if <tt>index &lt; 0 || index &gt;= 
size()</tt>.
+   */
+  public abstract void replaceFromWith(int from, Collection<T> other);
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/list/ObjectArrayList.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/mahout/math/list/ObjectArrayList.java 
b/core/src/main/java/org/apache/mahout/math/list/ObjectArrayList.java
new file mode 100644
index 0000000..c41141f
--- /dev/null
+++ b/core/src/main/java/org/apache/mahout/math/list/ObjectArrayList.java
@@ -0,0 +1,419 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+Copyright ï¿½ 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its 
documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear 
in all copies and 
+that both that copyright notice and this permission notice appear in 
supporting documentation. 
+CERN makes no representations about the suitability of this software for any 
purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.math.list;
+
+import org.apache.mahout.math.function.ObjectProcedure;
+
+import java.util.Collection;
+
+/**
+ Resizable list holding <code>${valueType}</code> elements; implemented with 
arrays.
+*/
+
+public class ObjectArrayList<T> extends AbstractObjectList<T> {
+
+  /**
+   * The array buffer into which the elements of the list are stored. The 
capacity of the list is the length of this
+   * array buffer.
+   */
+  private Object[] elements;
+  private int size;
+
+  /** Constructs an empty list. */
+  public ObjectArrayList() {
+    this(10);
+  }
+
+  /**
+   * Constructs a list containing the specified elements. The initial size and 
capacity of the list is the length of the
+   * array.
+   *
+   * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, 
<b>the array is not copied</b>. So if
+   * subsequently you modify the specified array directly via the [] operator, 
be sure you know what you're doing.
+   *
+   * @param elements the array to be backed by the the constructed list
+   */
+  public ObjectArrayList(T[] elements) {
+    elements(elements);
+  }
+
+  /**
+   * Constructs an empty list with the specified initial capacity.
+   *
+   * @param initialCapacity the number of elements the receiver can hold 
without auto-expanding itself by allocating new
+   *                        internal memory.
+   */
+  @SuppressWarnings("unchecked")
+  public ObjectArrayList(int initialCapacity) {
+    elements = new Object[initialCapacity];
+    size = 0;
+  }
+
+  /**
+   * Appends the specified element to the end of this list.
+   *
+   * @param element element to be appended to this list.
+   */
+  public void add(T element) {
+    // overridden for performance only.
+    if (size == elements.length) {
+      ensureCapacity(size + 1);
+    }
+    elements[size++] = element;
+  }
+
+  /**
+   * Inserts the specified element before the specified position into the 
receiver. Shifts the element currently at that
+   * position (if any) and any subsequent elements to the right.
+   *
+   * @param index   index before which the specified element is to be inserted 
(must be in [0,size]).
+   * @param element element to be inserted.
+   * @throws IndexOutOfBoundsException index is out of range (<tt>index &lt; 0 
|| index &gt; size()</tt>).
+   */
+  public void beforeInsert(int index, T element) {
+    // overridden for performance only.
+    if (size == index) {
+      add(element);
+      return;
+    }
+    if (index > size || index < 0) {
+      throw new IndexOutOfBoundsException("Index: " + index + ", Size: " + 
size);
+    }
+    ensureCapacity(size + 1);
+    System.arraycopy(elements, index, elements, index + 1, size - index);
+    elements[index] = element;
+    size++;
+  }
+
+
+  /**
+   * Returns a deep copy of the receiver.
+   *
+   * @return a deep copy of the receiver.
+   */
+  @SuppressWarnings("unchecked")
+  @Override
+  public Object clone() {
+    // overridden for performance only.
+    return new ObjectArrayList<>((T[]) elements.clone());
+  }
+
+  /**
+   * Returns a deep copy of the receiver; uses <code>clone()</code> and casts 
the result.
+   *
+   * @return a deep copy of the receiver.
+   */
+  @SuppressWarnings("unchecked")
+  public ObjectArrayList<T> copy() {
+    return (ObjectArrayList<T>) clone();
+  }
+
+  /**
+   * Returns the elements currently stored, including invalid elements between 
size and capacity, if any.
+   *
+   * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, 
<b>the array is not copied</b>. So if
+   * subsequently you modify the returned array directly via the [] operator, 
be sure you know what you're doing.
+   *
+   * @return the elements currently stored.
+   */
+  @SuppressWarnings("unchecked")
+  public <Q> Q[] elements() {
+    return (Q[])elements;
+  }
+
+  /**
+   * Sets the receiver's elements to be the specified array (not a copy of it).
+   *
+   * The size and capacity of the list is the length of the array. 
<b>WARNING:</b> For efficiency reasons and to keep
+   * memory usage low, <b>the array is not copied</b>. So if subsequently you 
modify the specified array directly via
+   * the [] operator, be sure you know what you're doing.
+   *
+   * @param elements the new elements to be stored.
+   */
+  public void elements(T[] elements) {
+    this.elements = elements;
+    this.size = elements.length;
+  }
+
+  /**
+   * Ensures that the receiver can hold at least the specified number of 
elements without needing to allocate new
+   * internal memory. If necessary, allocates new internal memory and 
increases the capacity of the receiver.
+   *
+   * @param minCapacity the desired minimum capacity.
+   */
+  public void ensureCapacity(int minCapacity) {
+    elements = org.apache.mahout.math.Arrays.ensureCapacity(elements, 
minCapacity);
+  }
+
+  /**
+   * Compares the specified Object with the receiver. Returns true if and only 
if the specified Object is also an
+   * ArrayList of the same type, both Lists have the same size, and all 
corresponding pairs of elements in the two Lists
+   * are identical. In other words, two Lists are defined to be equal if they 
contain the same elements in the same
+   * order.
+   *
+   * @param otherObj the Object to be compared for equality with the receiver.
+   * @return true if the specified Object is equal to the receiver.
+   */
+  @Override
+  @SuppressWarnings("unchecked")
+  public boolean equals(Object otherObj) { //delta
+    // overridden for performance only.
+    if (!(otherObj instanceof ObjectArrayList)) {
+      return super.equals(otherObj);
+    }
+    if (this == otherObj) {
+      return true;
+    }
+    if (otherObj == null) {
+      return false;
+    }
+    ObjectArrayList<?> other = (ObjectArrayList<?>) otherObj;
+    if (size() != other.size()) {
+      return false;
+    }
+
+    Object[] theElements = elements();
+    Object[] otherElements = other.elements();
+    for (int i = size(); --i >= 0;) {
+      if (theElements[i] != otherElements[i]) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  /**
+   * Applies a procedure to each element of the receiver, if any. Starts at 
index 0, moving rightwards.
+   *
+   * @param procedure the procedure to be applied. Stops iteration if the 
procedure returns <tt>false</tt>, otherwise
+   *                  continues.
+   * @return <tt>false</tt> if the procedure stopped before all elements where 
iterated over, <tt>true</tt> otherwise.
+   */
+  @SuppressWarnings("unchecked")
+  public boolean forEach(ObjectProcedure<T> procedure) {
+    T[] theElements = (T[]) elements;
+    int theSize = size;
+
+    for (int i = 0; i < theSize;) {
+      if (!procedure.apply(theElements[i++])) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  /**
+   * Returns the element at the specified position in the receiver.
+   *
+   * @param index index of element to return.
+   * @throws IndexOutOfBoundsException index is out of range (index &lt; 0 || 
index &gt;= size()).
+   */
+  @SuppressWarnings("unchecked")
+  public T get(int index) {
+    // overridden for performance only.
+    if (index >= size || index < 0) {
+      throw new IndexOutOfBoundsException("Index: " + index + ", Size: " + 
size);
+    }
+    return (T) elements[index];
+  }
+
+  /**
+   * Returns the element at the specified position in the receiver; 
<b>WARNING:</b> Does not check preconditions.
+   * Provided with invalid parameters this method may return invalid elements 
without throwing any exception! <b>You
+   * should only use this method when you are absolutely sure that the index 
is within bounds.</b> Precondition
+   * (unchecked): <tt>index &gt;= 0 && index &lt; size()</tt>.
+   *
+   * @param index index of element to return.
+   */
+  @SuppressWarnings("unchecked")
+  public T getQuick(int index) {
+    return (T) elements[index];
+  }
+
+  /**
+   * Returns the index of the first occurrence of the specified element. 
Returns <code>-1</code> if the receiver does
+   * not contain this element. Searches between <code>from</code>, inclusive 
and <code>to</code>, inclusive. Tests for
+   * identity.
+   *
+   * @param element element to search for.
+   * @param from    the leftmost search position, inclusive.
+   * @param to      the rightmost search position, inclusive.
+   * @return the index of the first occurrence of the element in the receiver; 
returns <code>-1</code> if the element is
+   *         not found.
+   * @throws IndexOutOfBoundsException index is out of range (<tt>size()&gt;0 
&& (from&lt;0 || from&gt;to ||
+   *                                   to&gt;=size())</tt>).
+   */
+  public int indexOfFromTo(T element, int from, int to) {
+    // overridden for performance only.
+    if (size == 0) {
+      return -1;
+    }
+    checkRangeFromTo(from, to, size);
+
+    Object[] theElements = elements;
+    for (int i = from; i <= to; i++) {
+      if (element == theElements[i]) {
+        return i;
+      } //found
+    }
+    return -1; //not found
+  }
+
+  /**
+   * Returns the index of the last occurrence of the specified element. 
Returns <code>-1</code> if the receiver does not
+   * contain this element. Searches beginning at <code>to</code>, inclusive 
until <code>from</code>, inclusive. Tests
+   * for identity.
+   *
+   * @param element element to search for.
+   * @param from    the leftmost search position, inclusive.
+   * @param to      the rightmost search position, inclusive.
+   * @return the index of the last occurrence of the element in the receiver; 
returns <code>-1</code> if the element is
+   *         not found.
+   * @throws IndexOutOfBoundsException index is out of range (<tt>size()&gt;0 
&& (from&lt;0 || from&gt;to ||
+   *                                   to&gt;=size())</tt>).
+   */
+  public int lastIndexOfFromTo(T element, int from, int to) {
+    // overridden for performance only.
+    if (size == 0) {
+      return -1;
+    }
+    checkRangeFromTo(from, to, size);
+
+    Object[] theElements = elements;
+    for (int i = to; i >= from; i--) {
+      if (element == theElements[i]) {
+        return i;
+      } //found
+    }
+    return -1; //not found
+  }
+
+  /**
+   * Returns a new list of the part of the receiver between <code>from</code>, 
inclusive, and <code>to</code>,
+   * inclusive.
+   *
+   * @param from the index of the first element (inclusive).
+   * @param to   the index of the last element (inclusive).
+   * @return a new list
+   * @throws IndexOutOfBoundsException index is out of range (<tt>size()&gt;0 
&& (from&lt;0 || from&gt;to ||
+   *                                   to&gt;=size())</tt>).
+   */
+  @SuppressWarnings("unchecked")
+  public AbstractObjectList<T> partFromTo(int from, int to) {
+    if (size == 0) {
+      return new ObjectArrayList<>(0);
+    }
+
+    checkRangeFromTo(from, to, size);
+
+    Object[] part = new Object[to - from + 1];
+    System.arraycopy(elements, from, part, 0, to - from + 1);
+    return new ObjectArrayList<>((T[]) part);
+  }
+
+  /** Reverses the elements of the receiver. Last becomes first, second last 
becomes second first, and so on. */
+  @Override
+  public void reverse() {
+    // overridden for performance only.
+    int limit = size / 2;
+    int j = size - 1;
+
+    Object[] theElements = elements;
+    for (int i = 0; i < limit;) { //swap
+      Object tmp = theElements[i];
+      theElements[i++] = theElements[j];
+      theElements[j--] = tmp;
+    }
+  }
+
+  /**
+   * Replaces the element at the specified position in the receiver with the 
specified element.
+   *
+   * @param index   index of element to replace.
+   * @param element element to be stored at the specified position.
+   * @throws IndexOutOfBoundsException index is out of range (index &lt; 0 || 
index &gt;= size()).
+   */
+  public void set(int index, T element) {
+    // overridden for performance only.
+    if (index >= size || index < 0) {
+      throw new IndexOutOfBoundsException("Index: " + index + ", Size: " + 
size);
+    }
+    elements[index] = element;
+  }
+
+  /**
+   * Replaces the element at the specified position in the receiver with the 
specified element; <b>WARNING:</b> Does not
+   * check preconditions. Provided with invalid parameters this method may 
access invalid indexes without throwing any
+   * exception! <b>You should only use this method when you are absolutely 
sure that the index is within bounds.</b>
+   * Precondition (unchecked): {@code index >= 0 && index < size()}.
+   *
+   * @param index   index of element to replace.
+   * @param element element to be stored at the specified position.
+   */
+  public void setQuick(int index, T element) {
+    elements[index] = element;
+  }
+
+  /**
+   * Trims the capacity of the receiver to be the receiver's current size. 
Releases any superfluous internal memory. An
+   * application can use this operation to minimize the storage of the 
receiver.
+   */
+  @Override
+  public void trimToSize() {
+    elements = org.apache.mahout.math.Arrays.trimToCapacity(elements, size());
+  }
+
+  @Override
+  public void removeFromTo(int fromIndex, int toIndex) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void replaceFromWith(int from, Collection<T> other) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  protected void beforeInsertDummies(int index, int length) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void mergeSortFromTo(int from, int to) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void quickSortFromTo(int from, int to) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public int size() {
+    return size;
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/list/SimpleLongArrayList.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/mahout/math/list/SimpleLongArrayList.java 
b/core/src/main/java/org/apache/mahout/math/list/SimpleLongArrayList.java
new file mode 100644
index 0000000..1a765eb
--- /dev/null
+++ b/core/src/main/java/org/apache/mahout/math/list/SimpleLongArrayList.java
@@ -0,0 +1,102 @@
+/*
+Copyright 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its 
documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear 
in all copies and 
+that both that copyright notice and this permission notice appear in 
supporting documentation. 
+CERN makes no representations about the suitability of this software for any 
purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.math.list;
+
+/**
+ Resizable list holding <code>long</code> elements; implemented with arrays; 
not efficient; just to
+ demonstrate which methods you must override to implement a fully functional 
list.
+ */
+public class SimpleLongArrayList extends AbstractLongList {
+
+  /**
+   * The array buffer into which the elements of the list are stored. The 
capacity of the list is the length of this
+   * array buffer.
+   */
+  private long[] elements;
+
+  /** Constructs an empty list. */
+  public SimpleLongArrayList() {
+    this(10);
+  }
+
+  /**
+   * Constructs a list containing the specified elements. The initial size and 
capacity of the list is the length of the
+   * array.
+   *
+   * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, 
<b>the array is not copied</b>. So if
+   * subsequently you modify the specified array directly via the [] operator, 
be sure you know what you're doing.
+   *
+   * @param elements the array to be backed by the the constructed list
+   */
+  public SimpleLongArrayList(long[] elements) {
+    elements(elements);
+  }
+
+  /**
+   * Constructs an empty list with the specified initial capacity.
+   *
+   * @param initialCapacity the number of elements the receiver can hold 
without auto-expanding itself by allocating new
+   *                        internal memory.
+   */
+  private SimpleLongArrayList(int initialCapacity) {
+    if (initialCapacity < 0) {
+      throw new IllegalArgumentException("Illegal Capacity: " + 
initialCapacity);
+    }
+
+    this.elements(new long[initialCapacity]);
+    size = 0;
+  }
+
+  /**
+   * Ensures that the receiver can hold at least the specified number of 
elements without needing to allocate new
+   * internal memory. If necessary, allocates new internal memory and 
increases the capacity of the receiver.
+   *
+   * @param minCapacity the desired minimum capacity.
+   */
+  @Override
+  public void ensureCapacity(int minCapacity) {
+    elements = org.apache.mahout.math.Arrays.ensureCapacity(elements, 
minCapacity);
+  }
+
+  /**
+   * Returns the element at the specified position in the receiver; 
<b>WARNING:</b> Does not check preconditions.
+   * Provided with invalid parameters this method may return invalid elements 
without throwing any exception! <b>You
+   * should only use this method when you are absolutely sure that the index 
is within bounds.</b> Precondition
+   * (unchecked): <tt>index &gt;= 0 && index &lt; size()</tt>.
+   *
+   * @param index index of element to return.
+   */
+  @Override
+  protected long getQuick(int index) {
+    return elements[index];
+  }
+
+  /**
+   * Replaces the element at the specified position in the receiver with the 
specified element; <b>WARNING:</b> Does not
+   * check preconditions. Provided with invalid parameters this method may 
access invalid indexes without throwing any
+   * exception! <b>You should only use this method when you are absolutely 
sure that the index is within bounds.</b>
+   * Precondition (unchecked): <tt>index &gt;= 0 && index &lt; size()</tt>.
+   *
+   * @param index   index of element to replace.
+   * @param element element to be stored at the specified position.
+   */
+  @Override
+  protected void setQuick(int index, long element) {
+    elements[index] = element;
+  }
+
+  /**
+   * Trims the capacity of the receiver to be the receiver's current size. An 
application can use this operation to
+   * minimize the storage of the receiver.
+   */
+  @Override
+  public void trimToSize() {
+    elements = org.apache.mahout.math.Arrays.trimToCapacity(elements, size());
+  }
+}

[05/15] mahout git commit: NO-JIRA Trevors updates

Reply via email to