I'm having a weird problem when using the commons math package. When I run
statistics using the Commons math, then compare the results to Excel, I get
different standard deviation and median, but min, max, and count are the
same. I'd appreciate any ideas on how Commons Math and Excel differ in
these calculations.
MEDIAN: Excel: 468,231 CommonsMath: 485,711
STD: Excel: 11,861 CommonsMath: 10,678
The data set is 18,000 integers so I won't include those. They are mostly 6
digit numbers. Here's the code:
import org.apache.commons.math.stat.descriptive.moment.StandardDeviation;
import org.apache.commons.math.stat.descriptive.rank.Max;
import org.apache.commons.math.stat.descriptive.rank.Median;
import org.apache.commons.math.stat.descriptive.rank.Min;
import gnu.trove.TDoubleHashSet;
public class ExampleForMailingList {
StandardDeviation std = new StandardDeviation( );
Min min = new Min( );
Max max = new Max( );
Median medianInstance = new Median();
private double minimum = 0;
private double maximum = 0;
private double standardDev = 0;
private double median = 0;
private boolean isCalcDone = false;
private double count = 0;
/**
* <code>data</code> If the length is zero, then only 0 measurements
were added.
*/
TDoubleHashSet data = new TDoubleHashSet( );
/**
* If the <code>measurement</code> is greater than 0, then add it to the
data.
*
* @param measurement
*/
public void addMeasurement( int measurement ) {
data.add( measurement );
count++;
}
/**
* Must be called before using the getters. This method calculates the
statistics.
*/
public void calculate() {
try {
double[] dataArray = data.toArray( );
minimum = min.evaluate( dataArray );
maximum = max.evaluate( dataArray );
standardDev = std.evaluate( dataArray );
median = medianInstance.evaluate(dataArray);
isCalcDone = true;
} catch ( RuntimeException e ) {
// TODO Auto-generated catch block
e.printStackTrace( );
}
} // calculate
public double getMinimum() throws CalcNotDoneException {
return minimum;
} // get minimum
public double getMaximum() throws CalcNotDoneException {
return maximum;
} // get maximum
public double getStd() throws CalcNotDoneException {
return standardDev;
} // get std
public double getMedian() throws CalcNotDoneException {
return median;
} // get median
/**
* Converts a result set into a set of statistics which a table model
consumes. Calculates: <br>
* 1. min <br>
* 2. average <br>
* 3. max <br>
* 4. median<br>
* 5. percent threshold violations <br>
* @param resultSetArg
* Results of an order table query
*/
public void processResults( ResultSet results,String column ) {
int value = Integer.MAX_VALUE;
try {
while ( results.next( ) ) {
value = ( int ) results.getLong( column );
if ( value > -1 ) {
addMeasurement( value );
}
}
} catch ( SQLException e ) {
// TODO Auto-generated catch block
e.printStackTrace();
} // while
} // processResults
public static void main( String[] args ) {
ExampleForMailingList example = new ExampleForMailingList();
example.processResults(ResultSet set,"columnA");
example.calculate( );
System.out.println("std: "+ example.getStd( ));
System.out.println("std: "+ example.getMedian( ));
}
}
Thanks!