Author: jeastman
Date: Thu Dec 22 20:18:34 2011
New Revision: 1222420
URL: http://svn.apache.org/viewvc?rev=1222420&view=rev
Log:
MAHOUT-846: Refactored pdf() to sum the exponents and perform the
exponentiation only once. All tests run
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/GaussianCluster.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java?rev=1222420&r1=1222419&r2=1222420&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java
Thu Dec 22 20:18:34 2011
@@ -26,7 +26,7 @@ import org.uncommons.maths.random.Gaussi
public final class UncommonDistributions {
- private static final double SQRT2PI = Math.sqrt(2.0 * Math.PI);
+ public static final double SQRT2PI = Math.sqrt(2.0 * Math.PI);
private static final Random RANDOM = RandomUtils.getRandom();
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/GaussianCluster.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/GaussianCluster.java?rev=1222420&r1=1222419&r2=1222420&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/GaussianCluster.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/GaussianCluster.java
Thu Dec 22 20:18:34 2011
@@ -17,10 +17,13 @@
package org.apache.mahout.clustering.dirichlet.models;
+import java.util.Iterator;
+
import org.apache.mahout.clustering.AbstractCluster;
import org.apache.mahout.clustering.Model;
import org.apache.mahout.clustering.dirichlet.UncommonDistributions;
import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.Vector.Element;
import org.apache.mahout.math.VectorWritable;
public class GaussianCluster extends AbstractCluster {
@@ -48,15 +51,29 @@ public class GaussianCluster extends Abs
@Override
public double pdf(VectorWritable vw) {
Vector x = vw.get();
- // return the product of the component pdfs
- // TODO: is this reasonable? correct? It seems to work in some cases.
- double pdf = 1;
- for (int i = 0; i < x.size(); i++) {
- // small prior on stdDev to avoid numeric instability when stdDev==0
- pdf *= UncommonDistributions.dNorm(x.getQuick(i),
- getCenter().getQuick(i), getRadius().getQuick(i) + 0.000001);
+ Vector m = getCenter();
+ Vector s = getRadius().plus(0.0000001); // add a small prior to avoid
divide
+ // by zero
+ return Math.exp(-(divideSquareAndSum(x.minus(m), s) / 2))
+ / zProd(s.times(UncommonDistributions.SQRT2PI));
+ }
+
+ private double zProd(Vector s) {
+ double prod = 1;
+ for (int i = 0; i < s.size(); i++) {
+ prod *= s.getQuick(i);
+ }
+ return prod;
+ }
+
+ private double divideSquareAndSum(Vector numerator, Vector denominator) {
+ double result = 0;
+ for (Iterator<Element> it = denominator.iterateNonZero(); it.hasNext();) {
+ Element denom = it.next();
+ double quotient = numerator.getQuick(denom.index()) / denom.get();
+ result += quotient * quotient;
}
- return pdf;
+ return result;
}
}