I'm having a weird problem when using the commons math package.  When I run
statistics using the Commons math, then compare the results to Excel, I get
different standard deviation and median, but min, max, and count are the
same.  I'd appreciate any ideas on how Commons Math and Excel differ in
these calculations.

MEDIAN:  Excel:  468,231   CommonsMath:  485,711
STD:        Excel:    11,861   CommonsMath:    10,678

The data set is 18,000 integers so I won't include those.  They are mostly 6
digit numbers.  Here's the code:

import org.apache.commons.math.stat.descriptive.moment.StandardDeviation;
import org.apache.commons.math.stat.descriptive.rank.Max;
import org.apache.commons.math.stat.descriptive.rank.Median;
import org.apache.commons.math.stat.descriptive.rank.Min;
import gnu.trove.TDoubleHashSet;

public class ExampleForMailingList {

   StandardDeviation std                    = new StandardDeviation( );

   Min               min                    = new Min( );

   Max               max                    = new Max( );

   Median            medianInstance               = new Median();

   private double    minimum                = 0;

   private double    maximum                = 0;

   private double    standardDev            = 0;

   private double median = 0;

   private boolean   isCalcDone             = false;

   private double   count                  = 0;

   /**
    * <code>data</code> If the length is zero, then only 0 measurements
were added.
    */
   TDoubleHashSet    data                   = new TDoubleHashSet( );

   /**
    * If the <code>measurement</code> is greater than 0, then add it to the
data.
    *
    * @param measurement
    */
   public void addMeasurement( int measurement ) {

           data.add( measurement );

           count++;
   }

   /**
    * Must be called before using the getters.  This method calculates the
statistics.
    */
   public void calculate() {

       try {
           double[] dataArray = data.toArray( );

           minimum = min.evaluate( dataArray );

           maximum = max.evaluate( dataArray );

           standardDev = std.evaluate( dataArray );

           median = medianInstance.evaluate(dataArray);

           isCalcDone = true;

       } catch ( RuntimeException e ) {
           // TODO Auto-generated catch block
           e.printStackTrace( );
       }
   } // calculate

   public double getMinimum() throws CalcNotDoneException {
       return minimum;
   } // get minimum

   public double getMaximum() throws CalcNotDoneException {
          return maximum;
   } // get maximum

   public double getStd() throws CalcNotDoneException {
        return standardDev;
   } // get std

   public double getMedian() throws CalcNotDoneException {
return median;
   } // get median

   /**
    * Converts a result set into a set of statistics which a table model
consumes. Calculates: <br>
    * 1. min <br>
    * 2. average <br>
    * 3. max <br>
    * 4. median<br>
    * 5. percent threshold violations <br>

    * @param resultSetArg
    *            Results of an order table query
   */
   public void processResults( ResultSet results,String column ) {

       int value = Integer.MAX_VALUE;

          try {
           while ( results.next( ) ) {

                     value = ( int ) results.getLong( column );

                       if ( value > -1 ) {
                           addMeasurement( value );
                       }
               }
       } catch ( SQLException e ) {
           // TODO Auto-generated catch block
           e.printStackTrace();
       } // while
} // processResults

   public static void main( String[] args ) {
       ExampleForMailingList example = new ExampleForMailingList();
       example.processResults(ResultSet set,"columnA");
       example.calculate( );

       System.out.println("std: "+ example.getStd( ));
       System.out.println("std: "+ example.getMedian( ));
   }
}

Thanks!

Reply via email to