Github user zhengruifeng commented on a diff in the pull request: https://github.com/apache/spark/pull/20518#discussion_r166813909 --- Diff: mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala --- @@ -745,4 +763,27 @@ private[spark] class CosineDistanceMeasure extends DistanceMeasure { override def distance(v1: VectorWithNorm, v2: VectorWithNorm): Double = { 1 - dot(v1.vector, v2.vector) / v1.norm / v2.norm } + + /** + * Updates the value of `sum` adding the `point` vector. + * @param point a `VectorWithNorm` to be added to `sum` of a cluster + * @param sum the `sum` for a cluster to be updated + */ + override def updateClusterSum(point: VectorWithNorm, sum: Vector): Unit = { + axpy(1.0 / point.norm, point.vector, sum) --- End diff -- do we need to ignore zero points here?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org