Author: jeastman
Date: Thu Dec 22 20:18:34 2011
New Revision: 1222420

URL: http://svn.apache.org/viewvc?rev=1222420&view=rev
Log:
MAHOUT-846: Refactored pdf() to sum the exponents and perform the 
exponentiation only once. All tests run

Modified:
    
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/GaussianCluster.java

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java?rev=1222420&r1=1222419&r2=1222420&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java
 Thu Dec 22 20:18:34 2011
@@ -26,7 +26,7 @@ import org.uncommons.maths.random.Gaussi
 
 public final class UncommonDistributions {
   
-  private static final double SQRT2PI = Math.sqrt(2.0 * Math.PI);
+  public static final double SQRT2PI = Math.sqrt(2.0 * Math.PI);
   
   private static final Random RANDOM = RandomUtils.getRandom();
   

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/GaussianCluster.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/GaussianCluster.java?rev=1222420&r1=1222419&r2=1222420&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/GaussianCluster.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/GaussianCluster.java
 Thu Dec 22 20:18:34 2011
@@ -17,10 +17,13 @@
 
 package org.apache.mahout.clustering.dirichlet.models;
 
+import java.util.Iterator;
+
 import org.apache.mahout.clustering.AbstractCluster;
 import org.apache.mahout.clustering.Model;
 import org.apache.mahout.clustering.dirichlet.UncommonDistributions;
 import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.Vector.Element;
 import org.apache.mahout.math.VectorWritable;
 
 public class GaussianCluster extends AbstractCluster {
@@ -48,15 +51,29 @@ public class GaussianCluster extends Abs
   @Override
   public double pdf(VectorWritable vw) {
     Vector x = vw.get();
-    // return the product of the component pdfs
-    // TODO: is this reasonable? correct? It seems to work in some cases.
-    double pdf = 1;
-    for (int i = 0; i < x.size(); i++) {
-      // small prior on stdDev to avoid numeric instability when stdDev==0
-      pdf *= UncommonDistributions.dNorm(x.getQuick(i),
-          getCenter().getQuick(i), getRadius().getQuick(i) + 0.000001);
+    Vector m = getCenter();
+    Vector s = getRadius().plus(0.0000001); // add a small prior to avoid 
divide
+                                            // by zero
+    return Math.exp(-(divideSquareAndSum(x.minus(m), s) / 2))
+        / zProd(s.times(UncommonDistributions.SQRT2PI));
+  }
+  
+  private double zProd(Vector s) {
+    double prod = 1;
+    for (int i = 0; i < s.size(); i++) {
+      prod *= s.getQuick(i);
+    }
+    return prod;
+  }
+  
+  private double divideSquareAndSum(Vector numerator, Vector denominator) {
+    double result = 0;
+    for (Iterator<Element> it = denominator.iterateNonZero(); it.hasNext();) {
+      Element denom = it.next();
+      double quotient = numerator.getQuick(denom.index()) / denom.get();
+      result += quotient * quotient;
     }
-    return pdf;
+    return result;
   }
   
 }


Reply via email to