Github user mgaido91 commented on a diff in the pull request:
https://github.com/apache/spark/pull/21561#discussion_r209860695
--- Diff:
mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala ---
@@ -171,6 +169,8 @@ class BisectingKMeans private (
val vectors = input.zip(norms).map { case (x, norm) => new
VectorWithNorm(x, norm) }
var assignments = vectors.map(v => (ROOT_INDEX, v))
var activeClusters = summarize(d, assignments, dMeasure)
+ val numSamples = activeClusters.values.map(_.size).sum
--- End diff --
ditto
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]