Author: [email protected]
Date: Mon Mar 28 09:24:28 2011
New Revision: 899

Log:
[AMDATU-332] Added Wald Wolfowitz test for detecting correlation

Modified:
   trunk/etc/performancetest/pom.xml
   
trunk/etc/performancetest/src/main/java/org/amdatu/test/performance/analysis/ReportSummary.java
   
trunk/etc/performancetest/src/main/java/org/amdatu/test/performance/analysis/ZSamples.java
   trunk/etc/performancetest/src/main/resources/report_template.html

Modified: trunk/etc/performancetest/pom.xml
==============================================================================
--- trunk/etc/performancetest/pom.xml   (original)
+++ trunk/etc/performancetest/pom.xml   Mon Mar 28 09:24:28 2011
@@ -19,9 +19,9 @@
       <scope>compile</scope>
     </dependency>
     <dependency>
-      <groupId>commons-math</groupId>
+      <groupId>org.apache.commons</groupId>
       <artifactId>commons-math</artifactId>
-      <version>1.2</version>
+      <version>2.2</version>
       <scope>compile</scope>
     </dependency>
   </dependencies>

Modified: 
trunk/etc/performancetest/src/main/java/org/amdatu/test/performance/analysis/ReportSummary.java
==============================================================================
--- 
trunk/etc/performancetest/src/main/java/org/amdatu/test/performance/analysis/ReportSummary.java
     (original)
+++ 
trunk/etc/performancetest/src/main/java/org/amdatu/test/performance/analysis/ReportSummary.java
     Mon Mar 28 09:24:28 2011
@@ -148,13 +148,81 @@
                 table.append("<td>" + round(sample.sampleMeanX) + "</td>");
             }
             table.append("</tr>");
+            
+            // Minimum of X
+            table.append("<tr><td>Min<sub>x</sub></td>");
+            for (ZSamples sample : report.getSamples()) {
+                table.append("<td>" + round(sample.minX) + "</td>");
+            }
+            table.append("</tr>");
+            
+            // Maximum of X
+            table.append("<tr><td>Max<sub>x</sub></td>");
+            for (ZSamples sample : report.getSamples()) {
+                table.append("<td>" + round(sample.maxX) + "</td>");
+            }
+            table.append("</tr>");   
+            
+            // Median of X
+            table.append("<tr><td>Median<sub>x</sub></td>");
+            for (ZSamples sample : report.getSamples()) {
+                table.append("<td>" + round(sample.medianX) + "</td>");
+            }
+            table.append("</tr>");
+            
+            // Wald-Wolfowitz probability of X
+            table.append("<tr><td>Wald-Wolfowitz<sub>x</sub></td>");
+            for (ZSamples sample : report.getSamples()) {
+                if (sample.waldWolfowitzX < 0.001) {
+                    table.append(reallyBad(round(100*sample.waldWolfowitzX) + 
"%"));
+                } else if (sample.waldWolfowitzX < 0.01) {
+                    table.append(bad(round(100*sample.waldWolfowitzX) + "%"));
+                } else {
+                    table.append("<td>"+ round(100*sample.waldWolfowitzX) + 
"%</td>");
+                }
+            }
+            table.append("</tr>"); 
 
             // Mean of Y
-            table.append("<tr><td>n</td>");
+            table.append("<tr><td>M<sub>y</sub></td>");
             for (ZSamples sample : report.getSamples()) {
                 table.append("<td>" + round(sample.sampleMeanY) + "</td>");
             }
             table.append("</tr>");
+            
+            // Minimum of Y
+            table.append("<tr><td>Min<sub>y</sub></td>");
+            for (ZSamples sample : report.getSamples()) {
+                table.append("<td>" + round(sample.minY) + "</td>");
+            }
+            table.append("</tr>");
+            
+            // Maximum of Y
+            table.append("<tr><td>Max<sub>y</sub></td>");
+            for (ZSamples sample : report.getSamples()) {
+                table.append("<td>" + round(sample.maxY) + "</td>");
+            }
+            table.append("</tr>");
+            
+            // Median of Y
+            table.append("<tr><td>Median<sub>y</sub></td>");
+            for (ZSamples sample : report.getSamples()) {
+                table.append("<td>" + round(sample.medianY) + "</td>");
+            }
+            table.append("</tr>");
+            
+            // Wald-Wolfowitz probability of X
+            table.append("<tr><td>Wald-Wolfowitz<sub>y</sub></td>");
+            for (ZSamples sample : report.getSamples()) {
+                if (sample.waldWolfowitzY < 0.001) {
+                    table.append(reallyBad(round(100*sample.waldWolfowitzY) + 
"%"));
+                } else if (sample.waldWolfowitzY < 0.01) {
+                    table.append(bad(round(100*sample.waldWolfowitzY) + "%"));
+                } else {
+                    table.append("<td>"+ round(100*sample.waldWolfowitzY) + 
"%</td>");
+                }
+            }
+            table.append("</tr>");          
 
             // Mean of Z
             table.append("<tr><td>M<sub>z</sub></td>");
@@ -169,6 +237,26 @@
                 table.append("<td>" + round(sample.sampleSD) + "</td>");
             }
             table.append("</tr>");
+            
+            // Median of Z
+            table.append("<tr><td>Median<sub>z</sub></td>");
+            for (ZSamples sample : report.getSamples()) {
+                table.append("<td>" + round(sample.medianZ) + "</td>");
+            }
+            table.append("</tr>");
+            
+            // Wald-Wolfowitz probability of Z
+            table.append("<tr><td>Wald-Wolfowitz<sub>z</sub></td>");
+            for (ZSamples sample : report.getSamples()) {
+                if (sample.waldWolfowitzZ < 0.001) {
+                    table.append(reallyBad(round(100*sample.waldWolfowitzZ) + 
"%"));
+                } else if (sample.waldWolfowitzZ < 0.01) {
+                    table.append(bad(round(100*sample.waldWolfowitzZ) + "%"));
+                } else {
+                    table.append("<td>"+ round(100*sample.waldWolfowitzZ) + 
"%</td>");
+                }
+            }
+            table.append("</tr>");              
 
             // t-value of Z
             table.append("<tr><td>t<sub>z</sub></td>");

Modified: 
trunk/etc/performancetest/src/main/java/org/amdatu/test/performance/analysis/ZSamples.java
==============================================================================
--- 
trunk/etc/performancetest/src/main/java/org/amdatu/test/performance/analysis/ZSamples.java
  (original)
+++ 
trunk/etc/performancetest/src/main/java/org/amdatu/test/performance/analysis/ZSamples.java
  Mon Mar 28 09:24:28 2011
@@ -25,7 +25,9 @@
 
 import org.amdatu.test.performance.runtest.ApplicationContext;
 import org.apache.commons.math.MathException;
+import org.apache.commons.math.distribution.NormalDistributionImpl;
 import org.apache.commons.math.distribution.TDistributionImpl;
+import org.apache.commons.math.stat.descriptive.rank.Median;
 
 /**
  * This class holds the samples of the sample observations (Z0=X0-Y0), 
(Z1=X1-Y1), ..., (Zn=Xn-Yn)
@@ -35,12 +37,22 @@
 public class ZSamples {
     private List<Double> m_samplesX;
     private List<Double> m_samplesY;
-    
+
     String name;
     public boolean H0; // H0 := mean(X) = mean(Y)
     double power = 0.9999;
     double successRateX;
     double successRateY;
+    double minX;
+    double minY;
+    double maxX;
+    double maxY;
+    double medianX;
+    double medianY;
+    double medianZ;
+    double waldWolfowitzX = 0;
+    double waldWolfowitzY = 0;
+    double waldWolfowitzZ = 0;
     double sampleMeanX;
     double sampleMeanY;
     double sampleMean;
@@ -108,7 +120,7 @@
         delta = D/sampleMeanY;
     }
 
-    private List<Double> getZ() throws IOException {
+    private List<Double> getZ() throws IOException, MathException {
         List<Double> results = new ArrayList<Double>();
 
         String fileName = name + m_context.getSamplesPostFix() + ".Z";
@@ -141,15 +153,23 @@
             }
         }
 
+        double[] z = new double[results.size()];
+        for (int i=0; i<results.size(); i++) {
+            z[i] = results.get(i);
+        }
+        
+        medianZ = new Median().evaluate(z);
+        waldWolfowitzZ = getWaldWolfowitzProbability(z);
+        
         return results;
     }
-    
+
     private int getSampleSize(double samples) {
         sampleMeanSize = (int) Math.min(150, Math.round(samples/1000));
         return sampleMeanSize;
     }
 
-    private void preProcess(List<XYSample> x, List<XYSample> y) {
+    private void preProcess(List<XYSample> x, List<XYSample> y) throws 
MathException {
         if (x.size() != y.size()) {
             System.err.println("Mismatch in sample sizes: X=" + x.size() + ", 
Y=" + y.size());
         }
@@ -163,7 +183,20 @@
         successRateY = 0;
         double meanX = 0, meanY = 0;
         int count = 0;
-        for (int i=0; i<Math.min(x.size(), y.size()); i++) {
+        minX = x.get(0).responseTime;
+        maxX = minX;
+        minY = y.get(0).responseTime;
+        maxY = minY;
+        int n = Math.min(x.size(), y.size());
+
+        double[] xValues = new double[n];
+        double[] yValues = new double[n];
+        double[] zValues = new double[n];
+        for (int i=0; i<n; i++) {
+            xValues[i] = x.get(i).responseTime;
+            yValues[i] = y.get(i).responseTime;
+            zValues[i] = xValues[i] - yValues[i];
+
             if (count > 0 && count == newSampleSize) {
                 m_samplesX.add(meanX / newSampleSize);
                 m_samplesY.add(meanY / newSampleSize);
@@ -174,10 +207,14 @@
             meanX += x.get(i).responseTime;
             meanY += y.get(i).responseTime;
             count++;
-            
+
             // Calculate success rate
             successRateX += x.get(i).success ? 1 : 0;
             successRateY += y.get(i).success ? 1 : 0;
+            minX = Math.min(minX, x.get(i).responseTime);
+            maxX = Math.max(maxX, x.get(i).responseTime);
+            minY = Math.min(minY, y.get(i).responseTime);
+            maxY = Math.max(maxY, y.get(i).responseTime);
         }
         if (count > 0 && count == newSampleSize) {
             m_samplesX.add(meanX / newSampleSize);
@@ -186,8 +223,53 @@
             meanX = 0;
             meanY = 0;
         }
+
+        successRateX /= n;
+        successRateY /= n;   
+
+        // Under the assumption that the distribution of X doesn't shift, 
plusRunsX+minusRunsX is normally
+        // distributed with mean 
+        medianX = new Median().evaluate(xValues);
+        waldWolfowitzX = getWaldWolfowitzProbability(xValues);
+
+        medianY = new Median().evaluate(yValues);
+        waldWolfowitzY = getWaldWolfowitzProbability(yValues);
+    }
     
-        successRateX /= Math.min(x.size(), y.size());
-        successRateY /= Math.min(x.size(), y.size());        
+    private double getWaldWolfowitzProbability(double[] values) throws 
MathException {
+        List<Integer> signs = new ArrayList<Integer>();
+        double Nplus = 0, Nmin = 0;
+        double median = new Median().evaluate(values);
+        for (int i=0; i<values.length; i++) {
+            if (values[i] < median) {
+                signs.add(new Integer(-1));
+                Nmin++;
+            } else if (values[i] > median) {
+                signs.add(new Integer(+1));
+                Nplus++;
+            }
+        }
+        
+        double runs = 0;
+        for (int i=0; i<signs.size(); i++) {
+            if (i > 0) {
+                if (signs.get(i).intValue() !=  signs.get(i-1).intValue()) {
+                    runs++;
+                }
+            }
+        }
+        
+        // Now R should be normally distributed under the null hypothesis of 
randomness
+        // with mean and variance:
+        double N = Nmin + Nplus;
+        double meanR = (2*Nmin*Nplus)/N+1;
+        double varR = ((meanR-1)*(meanR-2))/(N-1);
+
+        // Now calculate the probability of our outcome or runs under the null 
hypothesis
+        if (runs < meanR) {
+            return new NormalDistributionImpl(meanR, 
Math.sqrt(varR)).cumulativeProbability(runs);
+        } else {
+            return 1-new NormalDistributionImpl(meanR, 
Math.sqrt(varR)).cumulativeProbability(runs);
+        }
     }
 }

Modified: trunk/etc/performancetest/src/main/resources/report_template.html
==============================================================================
--- trunk/etc/performancetest/src/main/resources/report_template.html   
(original)
+++ trunk/etc/performancetest/src/main/resources/report_template.html   Mon Mar 
28 09:24:28 2011
@@ -115,18 +115,59 @@
                                        <td>The average of the samples 
M<sub>x</sub>(m)(1),...,M<sub>x</sub>(m)(n)</td>
                                </tr>
                                <tr>
+                                       <td>Min<sub>x</sub></td>
+                                       <td>The minimum of the samples 
X<sub>1</sub>,...,X<sub>n</sub></td>
+                               </tr>
+                               <tr>
+                                       <td>Max<sub>x</sub></td>
+                                       <td>The maximum of the samples 
X<sub>1</sub>,...,X<sub>n</sub></td>
+                               </tr>
+                               <tr>
+                                       <td>Median<sub>x</sub></td>
+                                       <td>The median of the samples  
X<sub>1</sub>,...,X<sub>n</sub></td>
+                               </tr>
+                               <tr>
+                                       <td>Wald-Wolfowitz<sub>x</sub></td>
+                                       <td>Outcome of the Wald-Wolfowitz test 
on samples from X; probability that the samples drawn from X are 
independent</td>
+                               </tr>
+                               <tr>
                                        <td>M<sub>y</sub></td>
                                        <td>The average of the samples 
M<sub>y</sub(m)(1),...,M<sub>y</sub(m)(n)</td>
                                </tr>
                                <tr>
+                                       <td>Min<sub>y</sub></td>
+                                       <td>The minimum of the samples 
Y<sub>1</sub>,...,Y<sub>n</sub></td>
+                               </tr>
+                               <tr>
+                                       <td>Max<sub>y</sub></td>
+                                       <td>The maximum of the samples 
Y<sub>1</sub>,...,Y<sub>n</sub></td>
+                               </tr>
+                               <tr>
+                                       <td>Median<sub>y</sub></td>
+                                       <td>The median of the samples 
Y<sub>1</sub>,...,Y<sub>n</sub></td>
+                               </tr>
+                               <tr>
+                                       <td>Wald-Wolfowitz<sub>y</sub></td>
+                                       <td>Outcome of the Wald-Wolfowitz test 
on samples from Y; probability that the samples drawn from Y are 
independent</td>
+                               </tr>
+                               <tr>
                                        <td>M<sub>z</sub></td>
                                        <td>The average of the samples 
Z<sub>1</sub(m),...,Z<sub>n</sub(m)</td></tr>
                                <tr>
                                        <td>S<sub>z</sub></td>
-                                       <td>The standard deviation of the 
samples Z<sub>1</sub(m),...,Z<sub>n</sub(m)</td></tr>
+                                       <td>The standard deviation of the 
samples Z<sub>1</sub(m),...,Z<sub>n</sub(m)</td>
+                               </tr>
+                               <tr>
+                                       <td>Median<sub>z</sub></td>
+                                       <td>The median of the samples  
Z<sub>1</sub>,...,Z<sub>n</sub></td>
+                               </tr>
+                               <tr>
+                                       <td>Wald-Wolfowitz<sub>z</sub></td>
+                                       <td>Outcome of the Wald-Wolfowitz test 
on samples from Z; probability that the samples drawn from Z are 
independent</td>
+                               </tr>
                                <tr>
                                        <td>t<sub>z</sub></td>
-                                       <td>The value of T, calculated from the 
samples 
Z<sub>1</sub(m),...,Z<sub>n</sub(m)Z<sub>1</sub(m),...,Z<sub>n</sub(m)</td>
+                                       <td>The value of T, calculated from the 
samples 
Z<sub>1</sub>(m),...,Z<sub>n</sub>(m)Z<sub>1</sub>(m),...,Z<sub>n</sub>(m)</td>
                                </tr>
                                <tr>
                                        <td>Pt<sub>z</sub></td>
_______________________________________________
Amdatu-commits mailing list
[email protected]
http://lists.amdatu.org/mailman/listinfo/amdatu-commits

Reply via email to