Author: srowen
Date: Mon Nov 28 18:54:29 2011
New Revision: 1207508

URL: http://svn.apache.org/viewvc?rev=1207508&view=rev
Log:
MAHOUT-900 fix sampling logic and handle case of < k elements

Modified:
    
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java?rev=1207508&r1=1207507&r2=1207508&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java
 Mon Nov 28 18:54:29 2011
@@ -95,7 +95,7 @@ public final class RandomSeedGenerator {
           if (currentSize < k) {
             chosenTexts.add(newText);
             chosenClusters.add(newCluster);
-          } else if (random.nextInt(currentSize + 1) == 0) { // with chance 
1/(currentSize+1) pick new element
+          } else if (random.nextInt(currentSize + 1) != 0) { // with chance 
1/(currentSize+1) pick new element
             int indexToRemove = random.nextInt(currentSize); // evict one 
chosen randomly
             chosenTexts.remove(indexToRemove);
             chosenClusters.remove(indexToRemove);
@@ -106,7 +106,7 @@ public final class RandomSeedGenerator {
       }
 
       try {
-        for (int i = 0; i < k; i++) {
+        for (int i = 0; i < chosenTexts.size(); i++) {
           writer.append(chosenTexts.get(i), chosenClusters.get(i));
         }
         log.info("Wrote {} vectors to {}", k, outFile);


Reply via email to