Author: srowen
Date: Mon Nov 28 18:54:29 2011
New Revision: 1207508
URL: http://svn.apache.org/viewvc?rev=1207508&view=rev
Log:
MAHOUT-900 fix sampling logic and handle case of < k elements
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java?rev=1207508&r1=1207507&r2=1207508&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java
Mon Nov 28 18:54:29 2011
@@ -95,7 +95,7 @@ public final class RandomSeedGenerator {
if (currentSize < k) {
chosenTexts.add(newText);
chosenClusters.add(newCluster);
- } else if (random.nextInt(currentSize + 1) == 0) { // with chance
1/(currentSize+1) pick new element
+ } else if (random.nextInt(currentSize + 1) != 0) { // with chance
1/(currentSize+1) pick new element
int indexToRemove = random.nextInt(currentSize); // evict one
chosen randomly
chosenTexts.remove(indexToRemove);
chosenClusters.remove(indexToRemove);
@@ -106,7 +106,7 @@ public final class RandomSeedGenerator {
}
try {
- for (int i = 0; i < k; i++) {
+ for (int i = 0; i < chosenTexts.size(); i++) {
writer.append(chosenTexts.get(i), chosenClusters.get(i));
}
log.info("Wrote {} vectors to {}", k, outFile);