Github user smurching commented on a diff in the pull request:
https://github.com/apache/spark/pull/19186#discussion_r138139729
--- Diff: mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
---
@@ -300,20 +300,23 @@ class KMeans @Since("1.5.0") (
@Since("1.5.0")
def setSeed(value: Long): this.type = set(seed, value)
+ /** @group setParam */
+ @Since("2.3.0")
+ def setHandlePersistence(value: Boolean): this.type =
set(handlePersistence, value)
+
@Since("2.0.0")
override def fit(dataset: Dataset[_]): KMeansModel = {
transformSchema(dataset.schema, logging = true)
- val handlePersistence = dataset.rdd.getStorageLevel ==
StorageLevel.NONE
val instances: RDD[OldVector] =
dataset.select(col($(featuresCol))).rdd.map {
case Row(point: Vector) => OldVectors.fromML(point)
}
- if (handlePersistence) {
+ if ($(handlePersistence)) {
--- End diff --
See comment above, we should also check that `dataset.storageLevel ==
StorageLevel.NONE`
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]