Author: jeastman
Date: Fri Apr 22 04:34:06 2011
New Revision: 1095862
URL: http://svn.apache.org/viewvc?rev=1095862&view=rev
Log:
MAHOUT-479: Extended ClusterClassifier, ClusterPolicy(s) and ClusterIterator
to support Fuzzy K-Means. Updated display examples to use the Cluster Iterator
by default to illustrate these changes. Not quite as elegant as the earlier
version but still a productive direction for exploration.
All unit tests run
Added:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/FuzzyKMeansClusteringPolicy.java
(with props)
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/AbstractCluster.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterClassifier.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterIterator.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusteringPolicy.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/DirichletClusteringPolicy.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/KMeansClusteringPolicy.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/Model.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletCluster.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalModel.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1Model.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModel.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/AbstractCluster.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/AbstractCluster.java?rev=1095862&r1=1095861&r2=1095862&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/AbstractCluster.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/AbstractCluster.java
Fri Apr 22 04:34:06 2011
@@ -34,26 +34,25 @@ import org.apache.mahout.math.VectorWrit
import org.apache.mahout.math.function.SquareRootFunction;
public abstract class AbstractCluster implements Cluster {
-
+
// cluster persistent state
private int id;
-
+
private long numPoints;
-
+
private Vector center;
-
+
private Vector radius;
-
- protected AbstractCluster() {
- }
-
+
+ protected AbstractCluster() {}
+
protected AbstractCluster(Vector point, int id2) {
this.numPoints = 0;
this.center = new RandomAccessSparseVector(point);
this.radius = point.like();
this.id = id2;
}
-
+
protected AbstractCluster(Vector center2, Vector radius2, int id2) {
this.numPoints = 0;
this.center = new RandomAccessSparseVector(center2);
@@ -75,63 +74,67 @@ public abstract class AbstractCluster im
public void createParameters(String prefix, Configuration jobConf) {
// nothing to do
}
-
+
/**
- * @param id the id to set
+ * @param id
+ * the id to set
*/
protected void setId(int id) {
this.id = id;
}
-
+
/**
- * @param l the numPoints to set
+ * @param l
+ * the numPoints to set
*/
protected void setNumPoints(long l) {
this.numPoints = l;
}
-
+
/**
- * @param center the center to set
+ * @param center
+ * the center to set
*/
protected void setCenter(Vector center) {
this.center = center;
}
-
+
/**
- * @param radius the radius to set
+ * @param radius
+ * the radius to set
*/
protected void setRadius(Vector radius) {
this.radius = radius;
}
-
+
// the observation statistics, initialized by the first observation
private double s0;
-
+
private Vector s1;
-
+
private Vector s2;
-
+
/**
* @return the s0
*/
protected double getS0() {
return s0;
}
-
+
/**
* @return the s1
*/
protected Vector getS1() {
return s1;
}
-
+
/**
* @return the s2
*/
protected Vector getS2() {
return s2;
}
-
+
public void observe(ClusterObservations observations) {
s0 += observations.getS0();
if (s1 == null) {
@@ -145,41 +148,61 @@ public abstract class AbstractCluster im
observations.getS2().addTo(s2);
}
}
-
+
@Override
public void observe(VectorWritable x) {
observe(x.get());
}
-
+
+ @Override
+ public void observe(VectorWritable x, double weight) {
+ observe(x.get(), weight);
+ }
+
public void observe(Vector x, double weight) {
- s0 += weight;
- Vector weightedX = x.times(weight);
+ if (weight == 1.0) {
+ observe(x);
+ } else {
+ s0 += weight;
+ Vector weightedX = x.times(weight);
+ if (s1 == null) {
+ s1 = weightedX;
+ } else {
+ weightedX.addTo(s1);
+ }
+ Vector x2 = x.times(x).times(weight);
+ if (s2 == null) {
+ s2 = x2;
+ } else {
+ x2.addTo(s2);
+ }
+ }
+ }
+
+ public void observe(Vector x) {
+ s0 += 1;
if (s1 == null) {
- s1 = weightedX;
+ s1 = x.clone();
} else {
- weightedX.addTo(s1);
+ x.addTo(s1);
}
- Vector x2 = x.times(x).times(weight);
+ Vector x2 = x.times(x);
if (s2 == null) {
s2 = x2;
} else {
x2.addTo(s2);
}
}
-
- public void observe(Vector x) {
- observe(x, 1.0);
- }
-
+
@Override
public long getNumPoints() {
return numPoints;
}
-
+
public ClusterObservations getObservations() {
return new ClusterObservations(s0, s1, s2);
}
-
+
@Override
public void computeParameters() {
if (s0 == 0) {
@@ -189,13 +212,14 @@ public abstract class AbstractCluster im
center = s1.divide(s0);
// compute the component stds
if (s0 > 1) {
- radius = s2.times(s0).minus(s1.times(s1)).assign(new
SquareRootFunction()).divide(s0);
- }
+ radius = s2.times(s0).minus(s1.times(s1))
+ .assign(new SquareRootFunction()).divide(s0);
+ }
s0 = 0;
s1 = null;
s2 = null;
}
-
+
@Override
public void readFields(DataInput in) throws IOException {
this.id = in.readInt();
@@ -206,7 +230,7 @@ public abstract class AbstractCluster im
temp.readFields(in);
this.radius = temp.get();
}
-
+
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(id);
@@ -214,7 +238,7 @@ public abstract class AbstractCluster im
VectorWritable.writeVector(out, center);
VectorWritable.writeVector(out, radius);
}
-
+
@Override
public String asFormatString(String[] bindings) {
StringBuilder buf = new StringBuilder(50);
@@ -228,24 +252,24 @@ public abstract class AbstractCluster im
buf.append('}');
return buf.toString();
}
-
+
public abstract String getIdentifier();
-
+
@Override
public Vector getCenter() {
return center;
}
-
+
@Override
public int getId() {
return id;
}
-
+
@Override
public Vector getRadius() {
return radius;
}
-
+
/**
* Compute the centroid by averaging the pointTotals
*
@@ -254,10 +278,10 @@ public abstract class AbstractCluster im
public Vector computeCentroid() {
return s0 == 0 ? getCenter() : s1.divide(s0);
}
-
+
/**
- * Return a human-readable formatted string representation of the vector,
not intended to be complete nor
- * usable as an input/output representation
+ * Return a human-readable formatted string representation of the vector, not
+ * intended to be complete nor usable as an input/output representation
*/
public static String formatVector(Vector v, String[] bindings) {
StringBuilder buf = new StringBuilder();
@@ -299,7 +323,7 @@ public abstract class AbstractCluster im
buf.append(']');
return buf.toString();
}
-
+
@Override
public long count() {
return getNumPoints();
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterClassifier.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterClassifier.java?rev=1095862&r1=1095861&r2=1095862&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterClassifier.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterClassifier.java
Fri Apr 22 04:34:06 2011
@@ -134,6 +134,20 @@ public class ClusterClassifier extends A
getModels().get(actual).observe(new VectorWritable(instance));
}
+ /**
+ * Train the models given an additional weight. Unique to ClusterClassifier
+ *
+ * @param actual
+ * the int index of a model
+ * @param data
+ * a data Vector
+ * @param weight
+ * a double weighting factor
+ */
+ public void train(int actual, Vector data, double weight) {
+ getModels().get(actual).observe(new VectorWritable(data), weight);
+ }
+
public void train(long trackingKey, String groupKey, int actual,
Vector instance) {
getModels().get(actual).observe(new VectorWritable(instance));
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterIterator.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterIterator.java?rev=1095862&r1=1095861&r2=1095862&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterIterator.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterIterator.java
Fri Apr 22 04:34:06 2011
@@ -16,6 +16,7 @@
*/
package org.apache.mahout.clustering;
+import java.util.Iterator;
import java.util.List;
import org.apache.mahout.math.Vector;
@@ -43,28 +44,32 @@ public class ClusterIterator {
*
* @param data
* a List<Vector> of input vectors
- * @param prior
- * the prior-trained ClusterClassifier
+ * @param classifier
+ * a prior ClusterClassifier
* @param numIterations
* the int number of iterations to perform
* @return the posterior ClusterClassifier
*/
- public ClusterClassifier iterate(List<Vector> data, ClusterClassifier prior,
- int numIterations) {
+ public ClusterClassifier iterate(List<Vector> data,
+ ClusterClassifier classifier, int numIterations) {
for (int iteration = 1; iteration <= numIterations; iteration++) {
for (Vector vector : data) {
// classification yields probabilities
- Vector pdfs = prior.classify(vector);
- // policy selects a model given those probabilities
- int selected = policy.select(pdfs);
+ Vector probabilities = classifier.classify(vector);
+ // policy selects weights for models given those probabilities
+ Vector weights = policy.select(probabilities);
// training causes all models to observe data
- prior.train(selected, vector);
+ for (Iterator<Vector.Element> it = weights.iterateNonZero(); it
+ .hasNext();) {
+ int index = it.next().index();
+ classifier.train(index, vector, weights.get(index));
+ }
}
// compute the posterior models
- prior.close();
+ classifier.close();
// update the policy
- policy.update(prior);
+ policy.update(classifier);
}
- return prior;
+ return classifier;
}
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusteringPolicy.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusteringPolicy.java?rev=1095862&r1=1095861&r2=1095862&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusteringPolicy.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusteringPolicy.java
Fri Apr 22 04:34:06 2011
@@ -27,11 +27,11 @@ public interface ClusteringPolicy {
/**
* Return the index of the most appropriate model
*
- * @param pdfs
+ * @param probabilities
* a Vector of pdfs
- * @return an int index
+ * @return a Vector of weights
*/
- public abstract int select(Vector pdfs);
+ public abstract Vector select(Vector probabilities);
/**
* Update the policy with the given classifier
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/DirichletClusteringPolicy.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/DirichletClusteringPolicy.java?rev=1095862&r1=1095861&r2=1095862&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/DirichletClusteringPolicy.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/DirichletClusteringPolicy.java
Fri Apr 22 04:34:06 2011
@@ -18,6 +18,7 @@ package org.apache.mahout.clustering;
import org.apache.mahout.clustering.dirichlet.UncommonDistributions;
import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.SequentialAccessSparseVector;
import org.apache.mahout.math.Vector;
public class DirichletClusteringPolicy implements ClusteringPolicy {
@@ -39,8 +40,11 @@ public class DirichletClusteringPolicy i
// Total observed over all time
private Vector totalCounts;
- public int select(Vector pdfs) {
- return UncommonDistributions.rMultinom(pdfs.times(mixture));
+ public Vector select(Vector probabilities) {
+ int rMultinom =
UncommonDistributions.rMultinom(probabilities.times(mixture));
+ Vector weights = new SequentialAccessSparseVector(probabilities.size());
+ weights.set(rMultinom, 1.0);
+ return weights;
}
// update the total counts and then the mixture
Added:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/FuzzyKMeansClusteringPolicy.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/FuzzyKMeansClusteringPolicy.java?rev=1095862&view=auto
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/FuzzyKMeansClusteringPolicy.java
(added)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/FuzzyKMeansClusteringPolicy.java
Fri Apr 22 04:34:06 2011
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.mahout.clustering;
+
+import org.apache.mahout.math.Vector;
+
+/**
+ * This is a probability-weighted clustering policy, suitable for fuzzy k-means
+ * clustering
+ *
+ */
+public class FuzzyKMeansClusteringPolicy implements ClusteringPolicy {
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
+ * org.apache.mahout.clustering.ClusteringPolicy#update(org.apache.mahout.
+ * clustering.ClusterClassifier)
+ */
+ @Override
+ public void update(ClusterClassifier posterior) {
+ // nothing to do here
+ }
+
+ /* (non-Javadoc)
+ * @see
org.apache.mahout.clustering.ClusteringPolicy#select(org.apache.mahout.math.Vector)
+ */
+ @Override
+ public Vector select(Vector probabilities) {
+ return probabilities;
+ }
+
+}
Propchange:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/FuzzyKMeansClusteringPolicy.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/KMeansClusteringPolicy.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/KMeansClusteringPolicy.java?rev=1095862&r1=1095861&r2=1095862&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/KMeansClusteringPolicy.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/KMeansClusteringPolicy.java
Fri Apr 22 04:34:06 2011
@@ -16,6 +16,7 @@
*/
package org.apache.mahout.clustering;
+import org.apache.mahout.math.SequentialAccessSparseVector;
import org.apache.mahout.math.Vector;
/**
@@ -29,8 +30,11 @@ public class KMeansClusteringPolicy impl
* @see
org.apache.mahout.clustering.ClusteringPolicy#select(org.apache.mahout.math.Vector)
*/
@Override
- public int select(Vector pdfs) {
- return pdfs.maxValueIndex();
+ public Vector select(Vector probabilities) {
+ int maxValueIndex = probabilities.maxValueIndex();
+ Vector weights = new SequentialAccessSparseVector(probabilities.size());
+ weights.set(maxValueIndex, 1.0);
+ return weights;
}
/* (non-Javadoc)
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/Model.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/Model.java?rev=1095862&r1=1095861&r2=1095862&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/Model.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/Model.java Fri
Apr 22 04:34:06 2011
@@ -21,8 +21,9 @@ import org.apache.hadoop.io.Writable;
import org.apache.mahout.math.VectorWritable;
/**
- * A model is a probability distribution over observed data points and allows
the probability of any data
- * point to be computed. All Models have a persistent representation and extend
+ * A model is a probability distribution over observed data points and allows
+ * the probability of any data point to be computed. All Models have a
+ * persistent representation and extend
* WritablesampleFromPosterior(Model<VectorWritable>[])
*/
public interface Model<O> extends Writable {
@@ -35,7 +36,7 @@ public interface Model<O> extends Writab
* @return the probability that x is in the receiver
*/
double pdf(O x);
-
+
/**
* Observe the given observation, retaining information about it
*
@@ -45,8 +46,18 @@ public interface Model<O> extends Writab
void observe(O x);
/**
- * Compute a new set of posterior parameters based upon the Observations
that have been observed since my
- * creation
+ * Observe the given observation, retaining information about it
+ *
+ * @param x
+ * an Observation from the posterior
+ * @param weight
+ * a double weighting factor
+ */
+ void observe(O x, double weight);
+
+ /**
+ * Compute a new set of posterior parameters based upon the Observations that
+ * have been observed since my creation
*/
void computeParameters();
@@ -56,10 +67,10 @@ public interface Model<O> extends Writab
* @return an int
*/
long count();
-
+
/**
* @return a sample of my posterior model
*/
Model<VectorWritable> sampleFromPosterior();
-
+
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletCluster.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletCluster.java?rev=1095862&r1=1095861&r2=1095862&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletCluster.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletCluster.java
Fri Apr 22 04:34:06 2011
@@ -163,4 +163,9 @@ public class DirichletCluster implements
return model.sampleFromPosterior();
}
+ @Override
+ public void observe(VectorWritable x, double weight) {
+ throw new UnsupportedOperationException();
+ }
+
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalModel.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalModel.java?rev=1095862&r1=1095861&r2=1095862&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalModel.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalModel.java
Fri Apr 22 04:34:06 2011
@@ -209,4 +209,9 @@ public class AsymmetricSampledNormalMode
public Vector getRadius() {
return getStdDev();
}
+
+ @Override
+ public void observe(VectorWritable x, double weight) {
+ throw new UnsupportedOperationException();
+ }
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1Model.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1Model.java?rev=1095862&r1=1095861&r2=1095862&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1Model.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1Model.java
Fri Apr 22 04:34:06 2011
@@ -150,4 +150,9 @@ public class L1Model implements Cluster
return null;
}
+ @Override
+ public void observe(VectorWritable x, double weight) {
+ throw new UnsupportedOperationException();
+ }
+
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModel.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModel.java?rev=1095862&r1=1095861&r2=1095862&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModel.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModel.java
Fri Apr 22 04:34:06 2011
@@ -209,4 +209,9 @@ public class NormalModel implements Clus
public Vector getRadius() {
return mean.like().assign(getStdDev());
}
+
+ @Override
+ public void observe(VectorWritable x, double weight) {
+ throw new UnsupportedOperationException();
+ }
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java?rev=1095862&r1=1095861&r2=1095862&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java
Fri Apr 22 04:34:06 2011
@@ -17,41 +17,44 @@
package org.apache.mahout.clustering.fuzzykmeans;
-import org.apache.commons.lang.NotImplementedException;
import org.apache.mahout.clustering.kmeans.Cluster;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
public class SoftCluster extends Cluster {
-
+
// For Writable
- public SoftCluster() {
- }
-
+ public SoftCluster() {}
+
/**
* Construct a new SoftCluster with the given point as its center
- *
- * @param center the center point
- * @param measure the DistanceMeasure
+ *
+ * @param center
+ * the center point
+ * @param measure
+ * the DistanceMeasure
*/
public SoftCluster(Vector center, int clusterId, DistanceMeasure measure) {
super(center, clusterId, measure);
}
-
+
@Override
public String asFormatString() {
- return this.getIdentifier() + ": " +
this.computeCentroid().asFormatString();
+ return this.getIdentifier() + ": "
+ + this.computeCentroid().asFormatString();
}
-
+
@Override
public String getIdentifier() {
return (isConverged() ? "SV-" : "SC-") + getId();
}
-
+
@Override
public double pdf(VectorWritable vw) {
- // SoftCluster pdf cannot be calculated out of context. See
FuzzyKMeansClusterer
- throw new NotImplementedException();
+ // SoftCluster pdf cannot be calculated out of context. See
+ // FuzzyKMeansClusterer
+ throw new UnsupportedOperationException(
+ "SoftCluster pdf cannot be calculated out of context. See
FuzzyKMeansClusterer");
}
}
Modified:
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java?rev=1095862&r1=1095861&r2=1095862&view=diff
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java
(original)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java
Fri Apr 22 04:34:06 2011
@@ -123,8 +123,8 @@ public class DisplayDirichlet extends Di
RandomUtils.useTestSeed();
generateSamples();
- int numIterations = 40;
- int numClusters = 40;
+ int numIterations = 20;
+ int numClusters = 10;
int alpha0 = 1;
int thin = 3;
int burnin = 5;
Modified:
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java?rev=1095862&r1=1095861&r2=1095862&view=diff
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
(original)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
Fri Apr 22 04:34:06 2011
@@ -19,33 +19,44 @@ package org.apache.mahout.clustering.dis
import java.awt.Graphics;
import java.awt.Graphics2D;
+import java.util.ArrayList;
+import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
+import org.apache.mahout.clustering.Cluster;
+import org.apache.mahout.clustering.ClusterClassifier;
+import org.apache.mahout.clustering.ClusterIterator;
+import org.apache.mahout.clustering.ClusteringPolicy;
+import org.apache.mahout.clustering.FuzzyKMeansClusteringPolicy;
import org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansDriver;
+import org.apache.mahout.clustering.fuzzykmeans.SoftCluster;
import org.apache.mahout.clustering.kmeans.RandomSeedGenerator;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
class DisplayFuzzyKMeans extends DisplayClustering {
-
+
DisplayFuzzyKMeans() {
initialize();
- this.setTitle("Fuzzy k-Means Clusters (>" + (int) (significance * 100) +
"% of population)");
+ this.setTitle("Fuzzy k-Means Clusters (>" + (int) (significance * 100)
+ + "% of population)");
}
-
+
// Override the paint() method
@Override
public void paint(Graphics g) {
plotSampleData((Graphics2D) g);
plotClusters((Graphics2D) g);
}
-
+
public static void main(String[] args) throws Exception {
DistanceMeasure measure = new ManhattanDistanceMeasure();
-
+
Path samples = new Path("samples");
Path output = new Path("output");
Configuration conf = new Configuration();
@@ -53,58 +64,40 @@ class DisplayFuzzyKMeans extends Display
HadoopUtil.delete(conf, output);
RandomUtils.useTestSeed();
DisplayClustering.generateSamples();
- //boolean b = true;
- //if (b) {
- writeSampleData(samples);
- Path clusters = RandomSeedGenerator.buildRandom(conf, samples, new
Path(output, "clusters-0"), 3, measure);
- double threshold = 0.001;
- int numIterations = 10;
- int m = 3;
- FuzzyKMeansDriver.run(samples,
- clusters,
- output,
- measure,
- threshold,
- numIterations,
- m,
- true,
- true,
- threshold,
- true);
-
- loadClusters(output);
- //} else {
- // List<Vector> points = new ArrayList<Vector>();
- // for (VectorWritable sample : SAMPLE_DATA) {
- // points.add((Vector) sample.get());
- // }
- // int id = 0;
- // List<SoftCluster> initialClusters = new ArrayList<SoftCluster>();
- // int numClusters = 3;
- // for (Vector point : points) {
- // if (initialClusters.size() < Math.min(numClusters, points.size())) {
- // initialClusters.add(new SoftCluster(point, id++));
- // } else {
- // break;
- // }
- // }
- // List<List<SoftCluster>> results =
FuzzyKMeansClusterer.clusterPoints(points,
- //
initialClusters,
- //
measure,
- //
threshold,
- // m,
- //
numIterations);
- // for (List<SoftCluster> models : results) {
- // List<org.apache.mahout.clustering.Cluster> clusters = new
ArrayList<org.apache.mahout.clustering.Cluster>();
- // for (SoftCluster cluster : models) {
- // org.apache.mahout.clustering.Cluster cluster2 =
(org.apache.mahout.clustering.Cluster) cluster;
- // if (isSignificant(cluster2)) {
- // clusters.add(cluster2);
- // }
- // }
- // CLUSTERS.add(clusters);
- // }
- //}
+ boolean b = false;
+ if (b) {
+ writeSampleData(samples);
+ Path clusters = RandomSeedGenerator.buildRandom(conf, samples, new Path(
+ output, "clusters-0"), 3, measure);
+ double threshold = 0.001;
+ int numIterations = 10;
+ int m = 3;
+ FuzzyKMeansDriver.run(samples, clusters, output, measure, threshold,
+ numIterations, m, true, true, threshold, true);
+
+ loadClusters(output);
+ } else {
+ List<Vector> points = new ArrayList<Vector>();
+ for (VectorWritable sample : SAMPLE_DATA) {
+ points.add(sample.get());
+ }
+ List<Cluster> initialClusters = new ArrayList<Cluster>();
+ int id = 0;
+ int numClusters = 4;
+ for (Vector point : points) {
+ if (initialClusters.size() < Math.min(numClusters, points.size())) {
+ initialClusters.add(new SoftCluster(point, id++, measure));
+ } else {
+ break;
+ }
+ }
+
+ ClusterClassifier prior = new ClusterClassifier(initialClusters);
+ ClusteringPolicy policy = new FuzzyKMeansClusteringPolicy();
+ ClusterClassifier posterior = new ClusterIterator(policy).iterate(points,
+ prior, 10);
+ CLUSTERS.add(posterior.getModels());
+ }
new DisplayFuzzyKMeans();
}
}
Modified:
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java?rev=1095862&r1=1095861&r2=1095862&view=diff
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
(original)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
Fri Apr 22 04:34:06 2011
@@ -75,7 +75,7 @@ class DisplayKMeans extends DisplayClust
}
List<Cluster> initialClusters = new ArrayList<Cluster>();
int id = 0;
- int numClusters = 3;
+ int numClusters = 4;
for (Vector point : points) {
if (initialClusters.size() < Math.min(numClusters, points.size())) {
initialClusters.add(new org.apache.mahout.clustering.kmeans.Cluster(