Repository: spark Updated Branches: refs/heads/master 15524c41b -> c2962546d
[SPARK-25758][ML] Deprecate computeCost on BisectingKMeans ## What changes were proposed in this pull request? The PR proposes to deprecate the `computeCost` method on `BisectingKMeans` in favor of the adoption of `ClusteringEvaluator` in order to evaluate the clustering. ## How was this patch tested? NA Closes #22756 from mgaido91/SPARK-25758. Authored-by: Marco Gaido <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c2962546 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c2962546 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c2962546 Branch: refs/heads/master Commit: c2962546d9a5900a5628a31b83d2c4b22c3a7936 Parents: 15524c4 Author: Marco Gaido <[email protected]> Authored: Thu Oct 18 10:32:25 2018 -0700 Committer: Dongjoon Hyun <[email protected]> Committed: Thu Oct 18 10:32:25 2018 -0700 ---------------------------------------------------------------------- .../scala/org/apache/spark/ml/clustering/BisectingKMeans.scala | 5 +++++ python/pyspark/ml/clustering.py | 6 ++++++ 2 files changed, 11 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/c2962546/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala index 5cb16cc..2243d99 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala @@ -125,8 +125,13 @@ class BisectingKMeansModel private[ml] ( /** * Computes the sum of squared distances between the input points and their corresponding cluster * centers. + * + * @deprecated This method is deprecated and will be removed in 3.0.0. Use ClusteringEvaluator + * instead. You can also get the cost on the training dataset in the summary. */ @Since("2.0.0") + @deprecated("This method is deprecated and will be removed in 3.0.0. Use ClusteringEvaluator " + + "instead. You can also get the cost on the training dataset in the summary.", "2.4.0") def computeCost(dataset: Dataset[_]): Double = { SchemaUtils.validateVectorCompatibleColumn(dataset.schema, getFeaturesCol) val data = DatasetUtils.columnToOldVector(dataset, getFeaturesCol) http://git-wip-us.apache.org/repos/asf/spark/blob/c2962546/python/pyspark/ml/clustering.py ---------------------------------------------------------------------- diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py index 5ef4e76..11eb124 100644 --- a/python/pyspark/ml/clustering.py +++ b/python/pyspark/ml/clustering.py @@ -540,7 +540,13 @@ class BisectingKMeansModel(JavaModel, JavaMLWritable, JavaMLReadable): """ Computes the sum of squared distances between the input points and their corresponding cluster centers. + + ..note:: Deprecated in 2.4.0. It will be removed in 3.0.0. Use ClusteringEvaluator instead. + You can also get the cost on the training dataset in the summary. """ + warnings.warn("Deprecated in 2.4.0. It will be removed in 3.0.0. Use ClusteringEvaluator " + "instead. You can also get the cost on the training dataset in the summary.", + DeprecationWarning) return self._call_java("computeCost", dataset) @property --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
