mdiggory 2003/07/06 00:18:08
Modified: math/src/java/org/apache/commons/math/stat StatUtils.java
Log:
Rolling Back StatUtils to previous version.
Revision Changes Path
1.12 +172 -104
jakarta-commons-sandbox/math/src/java/org/apache/commons/math/stat/StatUtils.java
Index: StatUtils.java
===================================================================
RCS file:
/home/cvs/jakarta-commons-sandbox/math/src/java/org/apache/commons/math/stat/StatUtils.java,v
retrieving revision 1.11
retrieving revision 1.12
diff -u -r1.11 -r1.12
--- StatUtils.java 5 Jul 2003 18:29:35 -0000 1.11
+++ StatUtils.java 6 Jul 2003 07:18:08 -0000 1.12
@@ -53,21 +53,6 @@
*/
package org.apache.commons.math.stat;
-import org.apache.commons.math.stat.univariate.UnivariateStatistic;
-import org.apache.commons.math.stat.univariate.moment.GeometricMean;
-import org.apache.commons.math.stat.univariate.moment.Kurtosis;
-import org.apache.commons.math.stat.univariate.moment.Mean;
-import org.apache.commons.math.stat.univariate.moment.Skewness;
-import org.apache.commons.math.stat.univariate.moment.Variance;
-import org.apache.commons.math.stat.univariate.rank.Max;
-import org.apache.commons.math.stat.univariate.rank.Median;
-import org.apache.commons.math.stat.univariate.rank.Min;
-import org.apache.commons.math.stat.univariate.rank.Percentile;
-import org.apache.commons.math.stat.univariate.summary.Product;
-import org.apache.commons.math.stat.univariate.summary.Sum;
-import org.apache.commons.math.stat.univariate.summary.SumOfLogs;
-import org.apache.commons.math.stat.univariate.summary.SumOfSquares;
-
/**
* StatUtils provides easy static implementations of common double[] based
* statistical methods. These return a single result value or in some cases, as
@@ -77,52 +62,13 @@
*/
public class StatUtils {
- /** Sum Of Logs */
- private static UnivariateStatistic sumLog = new SumOfLogs();
-
- /** Product */
- private static UnivariateStatistic product = new Product();
-
- /** Geometric Mean */
- private static UnivariateStatistic geoMean = new GeometricMean();
-
- /** Mean */
- private static UnivariateStatistic mean = new Mean();
-
- /** Variance */
- private static UnivariateStatistic var = new Variance();
-
- /** Skewness */
- private static UnivariateStatistic skew = new Skewness();
-
- /** Kurtosis */
- private static UnivariateStatistic kurt = new Kurtosis();
-
- /** Min Of Logs */
- private static UnivariateStatistic min = new Min();
-
- /** Max */
- private static UnivariateStatistic max = new Max();
-
- /** Median */
- private static UnivariateStatistic median = new Median();
-
- /** Sum */
- private static UnivariateStatistic sum = new Sum();
-
- /** Sum Of Squares */
- private static UnivariateStatistic sumSq = new SumOfSquares();
-
- /** Percentile */
- private static Percentile percentile = new Percentile();
-
/**
* The sum of the values that have been added to Univariate.
* @param values Is a double[] containing the values
* @return the sum of the values or Double.NaN if the array is empty
*/
public static double sum(double[] values) {
- return sum.evaluate(values, 0, values.length);
+ return sum(values, 0, values.length);
}
/**
@@ -133,7 +79,12 @@
* @return the sum of the values or Double.NaN if the array is empty
*/
public static double sum(double[] values, int begin, int length) {
- return sum.evaluate(values, begin, length);
+ testInput(values, begin, length);
+ double accum = 0.0;
+ for (int i = begin; i < begin + length; i++) {
+ accum += values[i];
+ }
+ return accum;
}
/**
@@ -142,7 +93,7 @@
* @return the sum of the squared values or Double.NaN if the array is empty
*/
public static double sumSq(double[] values) {
- return sumSq.evaluate(values);
+ return sumSq(values, 0, values.length);
}
/**
@@ -153,7 +104,12 @@
* @return the sum of the squared values or Double.NaN if the array is empty
*/
public static double sumSq(double[] values, int begin, int length) {
- return sumSq.evaluate(values, begin, length);
+ testInput(values, begin, length);
+ double accum = 0.0;
+ for (int i = begin; i < begin + length; i++) {
+ accum += Math.pow(values[i], 2.0);
+ }
+ return accum;
}
/**
@@ -162,7 +118,7 @@
* @return the product values or Double.NaN if the array is empty
*/
public static double product(double[] values) {
- return product.evaluate(values);
+ return product(values, 0, values.length);
}
/**
@@ -173,7 +129,12 @@
* @return the product values or Double.NaN if the array is empty
*/
public static double product(double[] values, int begin, int length) {
- return product.evaluate(values, begin, length);
+ testInput(values, begin, length);
+ double product = 1.0;
+ for (int i = begin; i < begin + length; i++) {
+ product *= values[i];
+ }
+ return product;
}
/**
@@ -182,7 +143,7 @@
* @return the sumLog value or Double.NaN if the array is empty
*/
public static double sumLog(double[] values) {
- return sumLog.evaluate(values);
+ return sumLog(values, 0, values.length);
}
/**
@@ -193,7 +154,12 @@
* @return the sumLog value or Double.NaN if the array is empty
*/
public static double sumLog(double[] values, int begin, int length) {
- return sumLog.evaluate(values, begin, length);
+ testInput(values, begin, length);
+ double sumLog = 0.0;
+ for (int i = begin; i < begin + length; i++) {
+ sumLog += Math.log(values[i]);
+ }
+ return sumLog;
}
/**
@@ -203,7 +169,7 @@
* any of the values are <= 0.
*/
public static double geometricMean(double[] values) {
- return geoMean.evaluate(values);
+ return geometricMean(values, 0, values.length);
}
/**
@@ -214,11 +180,9 @@
* @return the geometric mean or Double.NaN if the array is empty or
* any of the values are <= 0.
*/
- public static double geometricMean(
- double[] values,
- int begin,
- int length) {
- return geoMean.evaluate(values, begin, length);
+ public static double geometricMean(double[] values, int begin, int length) {
+ testInput(values, begin, length);
+ return Math.exp(sumLog(values, begin, length) / (double) length );
}
/**
@@ -228,7 +192,7 @@
* @return the mean of the values or Double.NaN if the array is empty
*/
public static double mean(double[] values) {
- return mean.evaluate(values);
+ return sum(values) / (double) values.length;
}
/**
@@ -240,7 +204,8 @@
* @return the mean of the values or Double.NaN if the array is empty
*/
public static double mean(double[] values, int begin, int length) {
- return mean.evaluate(values, begin, length);
+ testInput(values, begin, length);
+ return sum(values, begin, length) / ((double) length);
}
/**
@@ -265,7 +230,7 @@
double[] values,
int begin,
int length) {
-
+ testInput(values, begin, length);
double stdDev = Double.NaN;
if (values.length != 0) {
stdDev = Math.sqrt(variance(values, begin, length));
@@ -306,7 +271,24 @@
* or 0.0 for a single value set.
*/
public static double variance(double[] values, int begin, int length) {
- return var.evaluate(values, begin, length);
+ testInput(values, begin, length);
+
+ double variance = Double.NaN;
+ if (values.length == 1) {
+ variance = 0;
+ } else if (values.length > 1) {
+ double mean = mean(values, begin, length);
+ double accum = 0.0;
+ double accum2 = 0.0;
+ for (int i = begin; i < begin + length; i++) {
+ accum += Math.pow((values[i] - mean), 2.0);
+ accum2 += (values[i] - mean);
+ }
+ variance =
+ (accum - (Math.pow(accum2, 2) / ((double)length)))
+ / (double) (length - 1);
+ }
+ return variance;
}
/**
@@ -318,16 +300,51 @@
public static double skewness(double[] values) {
return skewness(values, 0, values.length);
}
- /**
- * Returns the skewness of a collection of values. Skewness is a
- * measure of the assymetry of a given distribution.
- * @param values Is a double[] containing the values
- * @param begin processing at this point in the array
- * @param length processing at this point in the array
- * @return the skewness of the values or Double.NaN if the array is empty
- */
+ /**
+ * Returns the skewness of a collection of values. Skewness is a
+ * measure of the assymetry of a given distribution.
+ * @param values Is a double[] containing the values
+ * @param begin processing at this point in the array
+ * @param length processing at this point in the array
+ * @return the skewness of the values or Double.NaN if the array is empty
+ */
public static double skewness(double[] values, int begin, int length) {
- return skew.evaluate(values, begin, length);
+
+ testInput(values, begin, length);
+
+ // Initialize the skewness
+ double skewness = Double.NaN;
+
+ // Get the mean and the standard deviation
+ double mean = mean(values, begin, length);
+
+ // Calc the std, this is implemented here instead of using the
+ // standardDeviation method eliminate a duplicate pass to get the mean
+ double accum = 0.0;
+ double accum2 = 0.0;
+ for (int i = begin; i < begin + length; i++) {
+ accum += Math.pow((values[i] - mean), 2.0);
+ accum2 += (values[i] - mean);
+ }
+ double stdDev =
+ Math.sqrt(
+ (accum - (Math.pow(accum2, 2) / ((double) length)))
+ / (double) (length - 1));
+
+ // Calculate the skew as the sum the cubes of the distance
+ // from the mean divided by the standard deviation.
+ double accum3 = 0.0;
+ for (int i = begin; i < begin + length; i++) {
+ accum3 += Math.pow((values[i] - mean) / stdDev, 3.0);
+ }
+
+ // Get N
+ double n = length;
+
+ // Calculate skewness
+ skewness = (n / ((n - 1) * (n - 2))) * accum3;
+
+ return skewness;
}
/**
@@ -339,7 +356,7 @@
public static double kurtosis(double[] values) {
return kurtosis(values, 0, values.length);
}
-
+
/**
* Returns the kurtosis for this collection of values. Kurtosis is a
* measure of the "peakedness" of a distribution.
@@ -349,9 +366,47 @@
* @return the kurtosis of the values or Double.NaN if the array is empty
*/
public static double kurtosis(double[] values, int begin, int length) {
- return kurt.evaluate(values, begin, length);
- }
+ testInput(values, begin, length);
+
+ // Initialize the kurtosis
+ double kurtosis = Double.NaN;
+
+ // Get the mean and the standard deviation
+ double mean = mean(values, begin, length);
+
+ // Calc the std, this is implemented here instead of using the
+ // standardDeviation method eliminate a duplicate pass to get the mean
+ double accum = 0.0;
+ double accum2 = 0.0;
+ for (int i = begin; i < begin + length; i++) {
+ accum += Math.pow((values[i] - mean), 2.0);
+ accum2 += (values[i] - mean);
+ }
+
+ double stdDev =
+ Math.sqrt(
+ (accum - (Math.pow(accum2, 2) / ((double) length)))
+ / (double) (length - 1));
+
+ // Sum the ^4 of the distance from the mean divided by the
+ // standard deviation
+ double accum3 = 0.0;
+ for (int i = begin; i < begin + length; i++) {
+ accum3 += Math.pow((values[i] - mean) / stdDev, 4.0);
+ }
+ // Get N
+ double n = length;
+
+ double coefficientOne = (n * (n + 1)) / ((n - 1) * (n - 2) * (n - 3));
+ double termTwo = ((3 * Math.pow(n - 1, 2.0)) / ((n - 2) * (n - 3)));
+
+ // Calculate kurtosis
+ kurtosis = (coefficientOne * accum3) - termTwo;
+
+ return kurtosis;
+ }
+
/**
* Returns the maximum of the available values
* @param values Is a double[] containing the values
@@ -369,7 +424,16 @@
* @return the maximum of the values or Double.NaN if the array is empty
*/
public static double max(double[] values, int begin, int length) {
- return max.evaluate(values, begin, length);
+ testInput(values, begin, length);
+ double max = Double.NaN;
+ for (int i = begin; i < begin + length; i++) {
+ if (i == 0) {
+ max = values[i];
+ } else {
+ max = (max > values[i]) ? max : values[i];
+ }
+ }
+ return max;
}
/**
@@ -389,32 +453,36 @@
* @return the minimum of the values or Double.NaN if the array is empty
*/
public static double min(double[] values, int begin, int length) {
- return min.evaluate(values, begin, length);
- }
+ testInput(values, begin, length);
- /**
- * Returns the p'th percentile for a double[]
- * @param values Is a double[] containing the values
- * @param p is 0 <= p <= 100
- * @return the value at the p'th percentile
- */
- public static double percentile(double[] values, double p) {
- return percentile.evaluate(values, p);
+ double min = Double.NaN;
+ for (int i = begin; i < begin + length; i++) {
+ if (i == 0) {
+ min = values[i];
+ } else {
+ min = (min < values[i]) ? min : values[i];
+ }
+ }
+ return min;
}
/**
- * Returns the p'th percentile for a double[]
+ * Private testInput method used by all methods to verify the content
+ * of the array and indicies are correct.
* @param values Is a double[] containing the values
* @param begin processing at this point in the array
* @param length processing at this point in the array
- * @param p is 0 <= p <= 100
- * @return the value at the p'th percentile
*/
- public static double percentile(
- double[] values,
- int begin,
- int length,
- double p) {
- return percentile.evaluate(values, begin, length, p);
+ private static void testInput(double[] values, int begin, int length) {
+
+ if (length > values.length)
+ throw new IllegalArgumentException("length > values.length");
+
+ if (begin + length > values.length)
+ throw new IllegalArgumentException("begin + length > values.length");
+
+ if (values == null)
+ throw new IllegalArgumentException("input value array is null");
+
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]