Author: jeastman
Date: Fri Apr 22 04:34:06 2011
New Revision: 1095862

URL: http://svn.apache.org/viewvc?rev=1095862&view=rev
Log:
MAHOUT-479:  Extended ClusterClassifier, ClusterPolicy(s) and ClusterIterator 
to support Fuzzy K-Means. Updated display examples to use the Cluster Iterator 
by default to illustrate these changes. Not quite as elegant as the earlier 
version but still a productive direction for exploration.
All unit tests run

Added:
    
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/FuzzyKMeansClusteringPolicy.java
   (with props)
Modified:
    
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/AbstractCluster.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterClassifier.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterIterator.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusteringPolicy.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/DirichletClusteringPolicy.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/KMeansClusteringPolicy.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/Model.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletCluster.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalModel.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1Model.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModel.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java
    
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java
    
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
    
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/AbstractCluster.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/AbstractCluster.java?rev=1095862&r1=1095861&r2=1095862&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/AbstractCluster.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/AbstractCluster.java
 Fri Apr 22 04:34:06 2011
@@ -34,26 +34,25 @@ import org.apache.mahout.math.VectorWrit
 import org.apache.mahout.math.function.SquareRootFunction;
 
 public abstract class AbstractCluster implements Cluster {
-
+  
   // cluster persistent state
   private int id;
-
+  
   private long numPoints;
-
+  
   private Vector center;
-
+  
   private Vector radius;
-
-  protected AbstractCluster() {
-  }
-
+  
+  protected AbstractCluster() {}
+  
   protected AbstractCluster(Vector point, int id2) {
     this.numPoints = 0;
     this.center = new RandomAccessSparseVector(point);
     this.radius = point.like();
     this.id = id2;
   }
-
+  
   protected AbstractCluster(Vector center2, Vector radius2, int id2) {
     this.numPoints = 0;
     this.center = new RandomAccessSparseVector(center2);
@@ -75,63 +74,67 @@ public abstract class AbstractCluster im
   public void createParameters(String prefix, Configuration jobConf) {
     // nothing to do
   }
-
+  
   /**
-   * @param id the id to set
+   * @param id
+   *          the id to set
    */
   protected void setId(int id) {
     this.id = id;
   }
-
+  
   /**
-   * @param l the numPoints to set
+   * @param l
+   *          the numPoints to set
    */
   protected void setNumPoints(long l) {
     this.numPoints = l;
   }
-
+  
   /**
-   * @param center the center to set
+   * @param center
+   *          the center to set
    */
   protected void setCenter(Vector center) {
     this.center = center;
   }
-
+  
   /**
-   * @param radius the radius to set
+   * @param radius
+   *          the radius to set
    */
   protected void setRadius(Vector radius) {
     this.radius = radius;
   }
-
+  
   // the observation statistics, initialized by the first observation
   private double s0;
-
+  
   private Vector s1;
-
+  
   private Vector s2;
-
+  
   /**
    * @return the s0
    */
   protected double getS0() {
     return s0;
   }
-
+  
   /**
    * @return the s1
    */
   protected Vector getS1() {
     return s1;
   }
-
+  
   /**
    * @return the s2
    */
   protected Vector getS2() {
     return s2;
   }
-
+  
   public void observe(ClusterObservations observations) {
     s0 += observations.getS0();
     if (s1 == null) {
@@ -145,41 +148,61 @@ public abstract class AbstractCluster im
       observations.getS2().addTo(s2);
     }
   }
-
+  
   @Override
   public void observe(VectorWritable x) {
     observe(x.get());
   }
-
+  
+  @Override
+  public void observe(VectorWritable x, double weight) {
+    observe(x.get(), weight);
+  }
+  
   public void observe(Vector x, double weight) {
-    s0 += weight;
-    Vector weightedX = x.times(weight);
+    if (weight == 1.0) {
+      observe(x);
+    } else {
+      s0 += weight;
+      Vector weightedX = x.times(weight);
+      if (s1 == null) {
+        s1 = weightedX;
+      } else {
+        weightedX.addTo(s1);
+      }
+      Vector x2 = x.times(x).times(weight);
+      if (s2 == null) {
+        s2 = x2;
+      } else {
+        x2.addTo(s2);
+      }
+    }
+  }
+  
+  public void observe(Vector x) {
+    s0 += 1;
     if (s1 == null) {
-      s1 = weightedX;
+      s1 = x.clone();
     } else {
-      weightedX.addTo(s1);
+      x.addTo(s1);
     }
-    Vector x2 = x.times(x).times(weight);
+    Vector x2 = x.times(x);
     if (s2 == null) {
       s2 = x2;
     } else {
       x2.addTo(s2);
     }
   }
-
-  public void observe(Vector x) {
-    observe(x, 1.0);
-  }
-
+  
   @Override
   public long getNumPoints() {
     return numPoints;
   }
-
+  
   public ClusterObservations getObservations() {
     return new ClusterObservations(s0, s1, s2);
   }
-
+  
   @Override
   public void computeParameters() {
     if (s0 == 0) {
@@ -189,13 +212,14 @@ public abstract class AbstractCluster im
     center = s1.divide(s0);
     // compute the component stds
     if (s0 > 1) {
-      radius = s2.times(s0).minus(s1.times(s1)).assign(new 
SquareRootFunction()).divide(s0);
-    } 
+      radius = s2.times(s0).minus(s1.times(s1))
+          .assign(new SquareRootFunction()).divide(s0);
+    }
     s0 = 0;
     s1 = null;
     s2 = null;
   }
-
+  
   @Override
   public void readFields(DataInput in) throws IOException {
     this.id = in.readInt();
@@ -206,7 +230,7 @@ public abstract class AbstractCluster im
     temp.readFields(in);
     this.radius = temp.get();
   }
-
+  
   @Override
   public void write(DataOutput out) throws IOException {
     out.writeInt(id);
@@ -214,7 +238,7 @@ public abstract class AbstractCluster im
     VectorWritable.writeVector(out, center);
     VectorWritable.writeVector(out, radius);
   }
-
+  
   @Override
   public String asFormatString(String[] bindings) {
     StringBuilder buf = new StringBuilder(50);
@@ -228,24 +252,24 @@ public abstract class AbstractCluster im
     buf.append('}');
     return buf.toString();
   }
-
+  
   public abstract String getIdentifier();
-
+  
   @Override
   public Vector getCenter() {
     return center;
   }
-
+  
   @Override
   public int getId() {
     return id;
   }
-
+  
   @Override
   public Vector getRadius() {
     return radius;
   }
-
+  
   /**
    * Compute the centroid by averaging the pointTotals
    * 
@@ -254,10 +278,10 @@ public abstract class AbstractCluster im
   public Vector computeCentroid() {
     return s0 == 0 ? getCenter() : s1.divide(s0);
   }
-
+  
   /**
-   * Return a human-readable formatted string representation of the vector, 
not intended to be complete nor
-   * usable as an input/output representation
+   * Return a human-readable formatted string representation of the vector, not
+   * intended to be complete nor usable as an input/output representation
    */
   public static String formatVector(Vector v, String[] bindings) {
     StringBuilder buf = new StringBuilder();
@@ -299,7 +323,7 @@ public abstract class AbstractCluster im
     buf.append(']');
     return buf.toString();
   }
-
+  
   @Override
   public long count() {
     return getNumPoints();

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterClassifier.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterClassifier.java?rev=1095862&r1=1095861&r2=1095862&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterClassifier.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterClassifier.java
 Fri Apr 22 04:34:06 2011
@@ -134,6 +134,20 @@ public class ClusterClassifier extends A
     getModels().get(actual).observe(new VectorWritable(instance));
   }
   
+  /**
+   * Train the models given an additional weight. Unique to ClusterClassifier
+   * 
+   * @param actual
+   *          the int index of a model
+   * @param data
+   *          a data Vector
+   * @param weight
+   *          a double weighting factor
+   */
+  public void train(int actual, Vector data, double weight) {
+    getModels().get(actual).observe(new VectorWritable(data), weight);
+  }
+  
   public void train(long trackingKey, String groupKey, int actual,
       Vector instance) {
     getModels().get(actual).observe(new VectorWritable(instance));

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterIterator.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterIterator.java?rev=1095862&r1=1095861&r2=1095862&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterIterator.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterIterator.java
 Fri Apr 22 04:34:06 2011
@@ -16,6 +16,7 @@
  */
 package org.apache.mahout.clustering;
 
+import java.util.Iterator;
 import java.util.List;
 
 import org.apache.mahout.math.Vector;
@@ -43,28 +44,32 @@ public class ClusterIterator {
    * 
    * @param data
    *          a List<Vector> of input vectors
-   * @param prior
-   *          the prior-trained ClusterClassifier
+   * @param classifier
+   *          a prior ClusterClassifier
    * @param numIterations
    *          the int number of iterations to perform
    * @return the posterior ClusterClassifier
    */
-  public ClusterClassifier iterate(List<Vector> data, ClusterClassifier prior,
-      int numIterations) {
+  public ClusterClassifier iterate(List<Vector> data,
+      ClusterClassifier classifier, int numIterations) {
     for (int iteration = 1; iteration <= numIterations; iteration++) {
       for (Vector vector : data) {
         // classification yields probabilities
-        Vector pdfs = prior.classify(vector);
-        // policy selects a model given those probabilities
-        int selected = policy.select(pdfs);
+        Vector probabilities = classifier.classify(vector);
+        // policy selects weights for models given those probabilities
+        Vector weights = policy.select(probabilities);
         // training causes all models to observe data
-        prior.train(selected, vector);
+        for (Iterator<Vector.Element> it = weights.iterateNonZero(); it
+            .hasNext();) {
+          int index = it.next().index();
+          classifier.train(index, vector, weights.get(index));
+        }
       }
       // compute the posterior models
-      prior.close();
+      classifier.close();
       // update the policy
-      policy.update(prior);
+      policy.update(classifier);
     }
-    return prior;
+    return classifier;
   }
 }

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusteringPolicy.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusteringPolicy.java?rev=1095862&r1=1095861&r2=1095862&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusteringPolicy.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusteringPolicy.java
 Fri Apr 22 04:34:06 2011
@@ -27,11 +27,11 @@ public interface ClusteringPolicy {
   /**
    * Return the index of the most appropriate model
    * 
-   * @param pdfs
+   * @param probabilities
    *          a Vector of pdfs
-   * @return an int index
+   * @return a Vector of weights
    */
-  public abstract int select(Vector pdfs);
+  public abstract Vector select(Vector probabilities);
   
   /**
    * Update the policy with the given classifier

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/DirichletClusteringPolicy.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/DirichletClusteringPolicy.java?rev=1095862&r1=1095861&r2=1095862&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/DirichletClusteringPolicy.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/DirichletClusteringPolicy.java
 Fri Apr 22 04:34:06 2011
@@ -18,6 +18,7 @@ package org.apache.mahout.clustering;
 
 import org.apache.mahout.clustering.dirichlet.UncommonDistributions;
 import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.SequentialAccessSparseVector;
 import org.apache.mahout.math.Vector;
 
 public class DirichletClusteringPolicy implements ClusteringPolicy {
@@ -39,8 +40,11 @@ public class DirichletClusteringPolicy i
   // Total observed over all time
   private Vector totalCounts;
   
-  public int select(Vector pdfs) {
-    return UncommonDistributions.rMultinom(pdfs.times(mixture));
+  public Vector select(Vector probabilities) {
+    int rMultinom = 
UncommonDistributions.rMultinom(probabilities.times(mixture));
+    Vector weights = new SequentialAccessSparseVector(probabilities.size());
+    weights.set(rMultinom, 1.0);
+    return weights;
   }
   
   // update the total counts and then the mixture

Added: 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/FuzzyKMeansClusteringPolicy.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/FuzzyKMeansClusteringPolicy.java?rev=1095862&view=auto
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/FuzzyKMeansClusteringPolicy.java
 (added)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/FuzzyKMeansClusteringPolicy.java
 Fri Apr 22 04:34:06 2011
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.mahout.clustering;
+
+import org.apache.mahout.math.Vector;
+
+/**
+ * This is a probability-weighted clustering policy, suitable for fuzzy k-means
+ * clustering
+ * 
+ */
+public class FuzzyKMeansClusteringPolicy implements ClusteringPolicy {
+    
+  /*
+   * (non-Javadoc)
+   * 
+   * @see
+   * org.apache.mahout.clustering.ClusteringPolicy#update(org.apache.mahout.
+   * clustering.ClusterClassifier)
+   */
+  @Override
+  public void update(ClusterClassifier posterior) {
+    // nothing to do here
+  }
+
+  /* (non-Javadoc)
+   * @see 
org.apache.mahout.clustering.ClusteringPolicy#select(org.apache.mahout.math.Vector)
+   */
+  @Override
+  public Vector select(Vector probabilities) {
+    return probabilities;
+  }
+  
+}

Propchange: 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/FuzzyKMeansClusteringPolicy.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/KMeansClusteringPolicy.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/KMeansClusteringPolicy.java?rev=1095862&r1=1095861&r2=1095862&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/KMeansClusteringPolicy.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/KMeansClusteringPolicy.java
 Fri Apr 22 04:34:06 2011
@@ -16,6 +16,7 @@
  */
 package org.apache.mahout.clustering;
 
+import org.apache.mahout.math.SequentialAccessSparseVector;
 import org.apache.mahout.math.Vector;
 
 /**
@@ -29,8 +30,11 @@ public class KMeansClusteringPolicy impl
    * @see 
org.apache.mahout.clustering.ClusteringPolicy#select(org.apache.mahout.math.Vector)
    */
   @Override
-  public int select(Vector pdfs) {
-    return pdfs.maxValueIndex();
+  public Vector select(Vector probabilities) {
+    int maxValueIndex = probabilities.maxValueIndex();
+    Vector weights = new SequentialAccessSparseVector(probabilities.size());
+    weights.set(maxValueIndex, 1.0);
+    return weights;
   }
   
   /* (non-Javadoc)

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/Model.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/Model.java?rev=1095862&r1=1095861&r2=1095862&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/Model.java 
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/Model.java Fri 
Apr 22 04:34:06 2011
@@ -21,8 +21,9 @@ import org.apache.hadoop.io.Writable;
 import org.apache.mahout.math.VectorWritable;
 
 /**
- * A model is a probability distribution over observed data points and allows 
the probability of any data
- * point to be computed. All Models have a persistent representation and extend
+ * A model is a probability distribution over observed data points and allows
+ * the probability of any data point to be computed. All Models have a
+ * persistent representation and extend
  * WritablesampleFromPosterior(Model<VectorWritable>[])
  */
 public interface Model<O> extends Writable {
@@ -35,7 +36,7 @@ public interface Model<O> extends Writab
    * @return the probability that x is in the receiver
    */
   double pdf(O x);
-
+  
   /**
    * Observe the given observation, retaining information about it
    * 
@@ -45,8 +46,18 @@ public interface Model<O> extends Writab
   void observe(O x);
   
   /**
-   * Compute a new set of posterior parameters based upon the Observations 
that have been observed since my
-   * creation
+   * Observe the given observation, retaining information about it
+   * 
+   * @param x
+   *          an Observation from the posterior
+   * @param weight
+   *          a double weighting factor
+   */
+  void observe(O x, double weight);
+
+  /**
+   * Compute a new set of posterior parameters based upon the Observations that
+   * have been observed since my creation
    */
   void computeParameters();
   
@@ -56,10 +67,10 @@ public interface Model<O> extends Writab
    * @return an int
    */
   long count();
-
+  
   /**
    * @return a sample of my posterior model
    */
   Model<VectorWritable> sampleFromPosterior();
-
+  
 }

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletCluster.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletCluster.java?rev=1095862&r1=1095861&r2=1095862&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletCluster.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletCluster.java
 Fri Apr 22 04:34:06 2011
@@ -163,4 +163,9 @@ public class DirichletCluster implements
     return model.sampleFromPosterior();
   }
 
+  @Override
+  public void observe(VectorWritable x, double weight) {
+   throw new UnsupportedOperationException();
+  }
+
 }

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalModel.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalModel.java?rev=1095862&r1=1095861&r2=1095862&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalModel.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalModel.java
 Fri Apr 22 04:34:06 2011
@@ -209,4 +209,9 @@ public class AsymmetricSampledNormalMode
   public Vector getRadius() {
     return getStdDev();
   }
+
+  @Override
+  public void observe(VectorWritable x, double weight) {
+   throw new UnsupportedOperationException();
+  }
 }

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1Model.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1Model.java?rev=1095862&r1=1095861&r2=1095862&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1Model.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1Model.java
 Fri Apr 22 04:34:06 2011
@@ -150,4 +150,9 @@ public class L1Model implements Cluster 
     return null;
   }
 
+  @Override
+  public void observe(VectorWritable x, double weight) {
+   throw new UnsupportedOperationException();
+  }
+
 }

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModel.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModel.java?rev=1095862&r1=1095861&r2=1095862&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModel.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModel.java
 Fri Apr 22 04:34:06 2011
@@ -209,4 +209,9 @@ public class NormalModel implements Clus
   public Vector getRadius() {
     return mean.like().assign(getStdDev());
   }
+
+  @Override
+  public void observe(VectorWritable x, double weight) {
+   throw new UnsupportedOperationException();
+  }
 }

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java?rev=1095862&r1=1095861&r2=1095862&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java
 Fri Apr 22 04:34:06 2011
@@ -17,41 +17,44 @@
 
 package org.apache.mahout.clustering.fuzzykmeans;
 
-import org.apache.commons.lang.NotImplementedException;
 import org.apache.mahout.clustering.kmeans.Cluster;
 import org.apache.mahout.common.distance.DistanceMeasure;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 
 public class SoftCluster extends Cluster {
-
+  
   // For Writable
-  public SoftCluster() {
-  }
-
+  public SoftCluster() {}
+  
   /**
    * Construct a new SoftCluster with the given point as its center
-   *
-   * @param center the center point
-   * @param measure the DistanceMeasure
+   * 
+   * @param center
+   *          the center point
+   * @param measure
+   *          the DistanceMeasure
    */
   public SoftCluster(Vector center, int clusterId, DistanceMeasure measure) {
     super(center, clusterId, measure);
   }
-
+  
   @Override
   public String asFormatString() {
-    return this.getIdentifier() + ": " + 
this.computeCentroid().asFormatString();
+    return this.getIdentifier() + ": "
+        + this.computeCentroid().asFormatString();
   }
-
+  
   @Override
   public String getIdentifier() {
     return (isConverged() ? "SV-" : "SC-") + getId();
   }
-
+  
   @Override
   public double pdf(VectorWritable vw) {
-    // SoftCluster pdf cannot be calculated out of context. See 
FuzzyKMeansClusterer
-    throw new NotImplementedException();
+    // SoftCluster pdf cannot be calculated out of context. See
+    // FuzzyKMeansClusterer
+    throw new UnsupportedOperationException(
+        "SoftCluster pdf cannot be calculated out of context. See 
FuzzyKMeansClusterer");
   }
 }

Modified: 
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java?rev=1095862&r1=1095861&r2=1095862&view=diff
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java
 (original)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java
 Fri Apr 22 04:34:06 2011
@@ -123,8 +123,8 @@ public class DisplayDirichlet extends Di
     
     RandomUtils.useTestSeed();
     generateSamples();
-    int numIterations = 40;
-    int numClusters = 40;
+    int numIterations = 20;
+    int numClusters = 10;
     int alpha0 = 1;
     int thin = 3;
     int burnin = 5;

Modified: 
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java?rev=1095862&r1=1095861&r2=1095862&view=diff
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
 (original)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
 Fri Apr 22 04:34:06 2011
@@ -19,33 +19,44 @@ package org.apache.mahout.clustering.dis
 
 import java.awt.Graphics;
 import java.awt.Graphics2D;
+import java.util.ArrayList;
+import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
+import org.apache.mahout.clustering.Cluster;
+import org.apache.mahout.clustering.ClusterClassifier;
+import org.apache.mahout.clustering.ClusterIterator;
+import org.apache.mahout.clustering.ClusteringPolicy;
+import org.apache.mahout.clustering.FuzzyKMeansClusteringPolicy;
 import org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansDriver;
+import org.apache.mahout.clustering.fuzzykmeans.SoftCluster;
 import org.apache.mahout.clustering.kmeans.RandomSeedGenerator;
 import org.apache.mahout.common.HadoopUtil;
 import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.common.distance.DistanceMeasure;
 import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
 
 class DisplayFuzzyKMeans extends DisplayClustering {
-
+  
   DisplayFuzzyKMeans() {
     initialize();
-    this.setTitle("Fuzzy k-Means Clusters (>" + (int) (significance * 100) + 
"% of population)");
+    this.setTitle("Fuzzy k-Means Clusters (>" + (int) (significance * 100)
+        + "% of population)");
   }
-
+  
   // Override the paint() method
   @Override
   public void paint(Graphics g) {
     plotSampleData((Graphics2D) g);
     plotClusters((Graphics2D) g);
   }
-
+  
   public static void main(String[] args) throws Exception {
     DistanceMeasure measure = new ManhattanDistanceMeasure();
-
+    
     Path samples = new Path("samples");
     Path output = new Path("output");
     Configuration conf = new Configuration();
@@ -53,58 +64,40 @@ class DisplayFuzzyKMeans extends Display
     HadoopUtil.delete(conf, output);
     RandomUtils.useTestSeed();
     DisplayClustering.generateSamples();
-    //boolean b = true;
-    //if (b) {
-    writeSampleData(samples);
-    Path clusters = RandomSeedGenerator.buildRandom(conf, samples, new 
Path(output, "clusters-0"), 3, measure);
-    double threshold = 0.001;
-    int numIterations = 10;
-    int m = 3;
-    FuzzyKMeansDriver.run(samples,
-                            clusters,
-                            output,
-                            measure,
-                            threshold,
-                            numIterations,
-                            m,
-                            true,
-                            true,
-                            threshold,
-                            true);
-
-    loadClusters(output);
-    //} else {
-    //  List<Vector> points = new ArrayList<Vector>();
-    //  for (VectorWritable sample : SAMPLE_DATA) {
-    //    points.add((Vector) sample.get());
-    //  }
-    //  int id = 0;
-    //  List<SoftCluster> initialClusters = new ArrayList<SoftCluster>();
-    //  int numClusters = 3;
-    //  for (Vector point : points) {
-    //    if (initialClusters.size() < Math.min(numClusters, points.size())) {
-    //      initialClusters.add(new SoftCluster(point, id++));
-    //    } else {
-    //     break;
-    //    }
-    //  }
-    //  List<List<SoftCluster>> results = 
FuzzyKMeansClusterer.clusterPoints(points,
-    //                                                                       
initialClusters,
-    //                                                                       
measure,
-    //                                                                       
threshold,
-    //                                                                       m,
-    //                                                                       
numIterations);
-    //  for (List<SoftCluster> models : results) {
-    //    List<org.apache.mahout.clustering.Cluster> clusters = new 
ArrayList<org.apache.mahout.clustering.Cluster>();
-    //    for (SoftCluster cluster : models) {
-    //      org.apache.mahout.clustering.Cluster cluster2 = 
(org.apache.mahout.clustering.Cluster) cluster;
-    //      if (isSignificant(cluster2)) {
-    //        clusters.add(cluster2);
-    //      }
-    //    }
-    //    CLUSTERS.add(clusters);
-    //  }
-    //}
+    boolean b = false;
+    if (b) {
+      writeSampleData(samples);
+      Path clusters = RandomSeedGenerator.buildRandom(conf, samples, new Path(
+          output, "clusters-0"), 3, measure);
+      double threshold = 0.001;
+      int numIterations = 10;
+      int m = 3;
+      FuzzyKMeansDriver.run(samples, clusters, output, measure, threshold,
+          numIterations, m, true, true, threshold, true);
+      
+      loadClusters(output);
+    } else {
+      List<Vector> points = new ArrayList<Vector>();
+      for (VectorWritable sample : SAMPLE_DATA) {
+        points.add(sample.get());
+      }
+      List<Cluster> initialClusters = new ArrayList<Cluster>();
+      int id = 0;
+      int numClusters = 4;
+      for (Vector point : points) {
+        if (initialClusters.size() < Math.min(numClusters, points.size())) {
+          initialClusters.add(new SoftCluster(point, id++, measure));
+        } else {
+          break;
+        }
+      }
+      
+      ClusterClassifier prior = new ClusterClassifier(initialClusters);
+      ClusteringPolicy policy = new FuzzyKMeansClusteringPolicy();
+      ClusterClassifier posterior = new ClusterIterator(policy).iterate(points,
+          prior, 10);
+      CLUSTERS.add(posterior.getModels());
+    }
     new DisplayFuzzyKMeans();
   }
 }

Modified: 
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java?rev=1095862&r1=1095861&r2=1095862&view=diff
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
 (original)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
 Fri Apr 22 04:34:06 2011
@@ -75,7 +75,7 @@ class DisplayKMeans extends DisplayClust
       }
       List<Cluster> initialClusters = new ArrayList<Cluster>();
       int id = 0;
-      int numClusters = 3;
+      int numClusters = 4;
       for (Vector point : points) {
         if (initialClusters.size() < Math.min(numClusters, points.size())) {
           initialClusters.add(new org.apache.mahout.clustering.kmeans.Cluster(


Reply via email to