Repository: spark Updated Branches: refs/heads/master fc65b4af0 -> fc10c898f
[SPARK-25758][ML] Deprecate computeCost in BisectingKMeans ## What changes were proposed in this pull request? The PR proposes to deprecate the `computeCost` method on `BisectingKMeans` in favor of the adoption of `ClusteringEvaluator` in order to evaluate the clustering. ## How was this patch tested? NA Closes #22869 from mgaido91/SPARK-25758_3.0. Authored-by: Marco Gaido <marcogaid...@gmail.com> Signed-off-by: DB Tsai <d_t...@apple.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fc10c898 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fc10c898 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fc10c898 Branch: refs/heads/master Commit: fc10c898f45a25cf3751f0cd042e4c0743f1adba Parents: fc65b4a Author: Marco Gaido <marcogaid...@gmail.com> Authored: Mon Nov 5 22:13:20 2018 +0000 Committer: DB Tsai <d_t...@apple.com> Committed: Mon Nov 5 22:13:20 2018 +0000 ---------------------------------------------------------------------- .../org/apache/spark/ml/clustering/BisectingKMeans.scala | 7 +++++++ python/pyspark/ml/clustering.py | 7 +++++++ 2 files changed, 14 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/fc10c898/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala index 5cb16cc..1a94aef 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala @@ -125,8 +125,15 @@ class BisectingKMeansModel private[ml] ( /** * Computes the sum of squared distances between the input points and their corresponding cluster * centers. + * + * @deprecated This method is deprecated and will be removed in future versions. Use + * ClusteringEvaluator instead. You can also get the cost on the training dataset in + * the summary. */ @Since("2.0.0") + @deprecated("This method is deprecated and will be removed in future versions. Use " + + "ClusteringEvaluator instead. You can also get the cost on the training dataset in the " + + "summary.", "3.0.0") def computeCost(dataset: Dataset[_]): Double = { SchemaUtils.validateVectorCompatibleColumn(dataset.schema, getFeaturesCol) val data = DatasetUtils.columnToOldVector(dataset, getFeaturesCol) http://git-wip-us.apache.org/repos/asf/spark/blob/fc10c898/python/pyspark/ml/clustering.py ---------------------------------------------------------------------- diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py index 5ef4e76..b371294 100644 --- a/python/pyspark/ml/clustering.py +++ b/python/pyspark/ml/clustering.py @@ -540,7 +540,14 @@ class BisectingKMeansModel(JavaModel, JavaMLWritable, JavaMLReadable): """ Computes the sum of squared distances between the input points and their corresponding cluster centers. + + ..note:: Deprecated in 3.0.0. It will be removed in future versions. Use + ClusteringEvaluator instead. You can also get the cost on the training dataset in the + summary. """ + warnings.warn("Deprecated in 3.0.0. It will be removed in future versions. Use " + "ClusteringEvaluator instead. You can also get the cost on the training " + "dataset in the summary.", DeprecationWarning) return self._call_java("computeCost", dataset) @property --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org