Repository: spark Updated Branches: refs/heads/branch-2.4 ac9a6f08a -> 71a6a9ce8
[SPARK-25758][ML] Deprecate computeCost on BisectingKMeans ## What changes were proposed in this pull request? The PR proposes to deprecate the `computeCost` method on `BisectingKMeans` in favor of the adoption of `ClusteringEvaluator` in order to evaluate the clustering. ## How was this patch tested? NA Closes #22756 from mgaido91/SPARK-25758. Authored-by: Marco Gaido <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]> (cherry picked from commit c2962546d9a5900a5628a31b83d2c4b22c3a7936) Signed-off-by: Dongjoon Hyun <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/71a6a9ce Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/71a6a9ce Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/71a6a9ce Branch: refs/heads/branch-2.4 Commit: 71a6a9ce8558913bc410918c14b6799be9baaeb3 Parents: ac9a6f0 Author: Marco Gaido <[email protected]> Authored: Thu Oct 18 10:32:25 2018 -0700 Committer: Dongjoon Hyun <[email protected]> Committed: Thu Oct 18 10:32:37 2018 -0700 ---------------------------------------------------------------------- .../scala/org/apache/spark/ml/clustering/BisectingKMeans.scala | 5 +++++ python/pyspark/ml/clustering.py | 6 ++++++ 2 files changed, 11 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/71a6a9ce/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala index 5cb16cc..2243d99 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala @@ -125,8 +125,13 @@ class BisectingKMeansModel private[ml] ( /** * Computes the sum of squared distances between the input points and their corresponding cluster * centers. + * + * @deprecated This method is deprecated and will be removed in 3.0.0. Use ClusteringEvaluator + * instead. You can also get the cost on the training dataset in the summary. */ @Since("2.0.0") + @deprecated("This method is deprecated and will be removed in 3.0.0. Use ClusteringEvaluator " + + "instead. You can also get the cost on the training dataset in the summary.", "2.4.0") def computeCost(dataset: Dataset[_]): Double = { SchemaUtils.validateVectorCompatibleColumn(dataset.schema, getFeaturesCol) val data = DatasetUtils.columnToOldVector(dataset, getFeaturesCol) http://git-wip-us.apache.org/repos/asf/spark/blob/71a6a9ce/python/pyspark/ml/clustering.py ---------------------------------------------------------------------- diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py index 5ef4e76..11eb124 100644 --- a/python/pyspark/ml/clustering.py +++ b/python/pyspark/ml/clustering.py @@ -540,7 +540,13 @@ class BisectingKMeansModel(JavaModel, JavaMLWritable, JavaMLReadable): """ Computes the sum of squared distances between the input points and their corresponding cluster centers. + + ..note:: Deprecated in 2.4.0. It will be removed in 3.0.0. Use ClusteringEvaluator instead. + You can also get the cost on the training dataset in the summary. """ + warnings.warn("Deprecated in 2.4.0. It will be removed in 3.0.0. Use ClusteringEvaluator " + "instead. You can also get the cost on the training dataset in the summary.", + DeprecationWarning) return self._call_java("computeCost", dataset) @property --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
