Repository: spark Updated Branches: refs/heads/master b3aedca6b -> 33ae7a35d
[SPARK-11358][MLLIB] deprecate runs in k-means This PR deprecates `runs` in k-means. `runs` introduces extra complexity and overhead in MLlib's k-means implementation. I haven't seen much usage with `runs` not equal to `1`. We don't have a unit test for it either. We can deprecate this method in 1.6, and void it in 1.7. It helps us simplify the implementation. cc: srowen Author: Xiangrui Meng <[email protected]> Closes #9322 from mengxr/SPARK-11358. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/33ae7a35 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/33ae7a35 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/33ae7a35 Branch: refs/heads/master Commit: 33ae7a35daa86c34f1f9f72f997e0c2d4cd8abec Parents: b3aedca Author: Xiangrui Meng <[email protected]> Authored: Mon Nov 2 13:42:16 2015 -0800 Committer: Xiangrui Meng <[email protected]> Committed: Mon Nov 2 13:42:16 2015 -0800 ---------------------------------------------------------------------- .../main/scala/org/apache/spark/mllib/clustering/KMeans.scala | 4 ++-- python/pyspark/mllib/clustering.py | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/33ae7a35/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala index 7168aac..2895db7 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala @@ -107,7 +107,7 @@ class KMeans private ( * Number of runs of the algorithm to execute in parallel. */ @Since("1.4.0") - @Experimental + @deprecated("Support for runs is deprecated. This param will have no effect in 1.7.0.", "1.6.0") def getRuns: Int = runs /** @@ -117,7 +117,7 @@ class KMeans private ( * return the best clustering found over any run. Default: 1. */ @Since("0.8.0") - @Experimental + @deprecated("Support for runs is deprecated. This param will have no effect in 1.7.0.", "1.6.0") def setRuns(runs: Int): this.type = { if (runs <= 0) { throw new IllegalArgumentException("Number of runs must be positive") http://git-wip-us.apache.org/repos/asf/spark/blob/33ae7a35/python/pyspark/mllib/clustering.py ---------------------------------------------------------------------- diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py index d1c3755..8629aa5 100644 --- a/python/pyspark/mllib/clustering.py +++ b/python/pyspark/mllib/clustering.py @@ -17,6 +17,7 @@ import sys import array as pyarray +import warnings if sys.version > '3': xrange = range @@ -170,6 +171,9 @@ class KMeans(object): def train(cls, rdd, k, maxIterations=100, runs=1, initializationMode="k-means||", seed=None, initializationSteps=5, epsilon=1e-4, initialModel=None): """Train a k-means clustering model.""" + if runs != 1: + warnings.warn( + "Support for runs is deprecated in 1.6.0. This param will have no effect in 1.7.0.") clusterInitialModel = [] if initialModel is not None: if not isinstance(initialModel, KMeansModel): --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
