Repository: spark Updated Branches: refs/heads/branch-2.4 fd5b24726 -> 36307b1e4
[SPARK-25764][ML][EXAMPLES] Update BisectingKMeans example to use ClusteringEvaluator ## What changes were proposed in this pull request? The PR updates the examples for `BisectingKMeans` so that they don't use the deprecated method `computeCost` (see SPARK-25758). ## How was this patch tested? running examples Closes #22763 from mgaido91/SPARK-25764. Authored-by: Marco Gaido <marcogaid...@gmail.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit d0ecff28545ac81f5ba7ac06957ced65b6e3ebcd) Signed-off-by: Wenchen Fan <wenc...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/36307b1e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/36307b1e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/36307b1e Branch: refs/heads/branch-2.4 Commit: 36307b1e4b42ce22b07e7a3fc2679c4b5e7c34c8 Parents: fd5b247 Author: Marco Gaido <marcogaid...@gmail.com> Authored: Fri Oct 19 09:33:46 2018 +0800 Committer: Wenchen Fan <wenc...@databricks.com> Committed: Fri Oct 19 09:34:25 2018 +0800 ---------------------------------------------------------------------- .../spark/examples/ml/JavaBisectingKMeansExample.java | 12 +++++++++--- .../src/main/python/ml/bisecting_k_means_example.py | 12 +++++++++--- .../spark/examples/ml/BisectingKMeansExample.scala | 12 +++++++++--- 3 files changed, 27 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/36307b1e/examples/src/main/java/org/apache/spark/examples/ml/JavaBisectingKMeansExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaBisectingKMeansExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaBisectingKMeansExample.java index 8c82aaa..f517dc3 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaBisectingKMeansExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaBisectingKMeansExample.java @@ -20,6 +20,7 @@ package org.apache.spark.examples.ml; // $example on$ import org.apache.spark.ml.clustering.BisectingKMeans; import org.apache.spark.ml.clustering.BisectingKMeansModel; +import org.apache.spark.ml.evaluation.ClusteringEvaluator; import org.apache.spark.ml.linalg.Vector; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; @@ -50,9 +51,14 @@ public class JavaBisectingKMeansExample { BisectingKMeans bkm = new BisectingKMeans().setK(2).setSeed(1); BisectingKMeansModel model = bkm.fit(dataset); - // Evaluate clustering. - double cost = model.computeCost(dataset); - System.out.println("Within Set Sum of Squared Errors = " + cost); + // Make predictions + Dataset<Row> predictions = model.transform(dataset); + + // Evaluate clustering by computing Silhouette score + ClusteringEvaluator evaluator = new ClusteringEvaluator(); + + double silhouette = evaluator.evaluate(predictions); + System.out.println("Silhouette with squared euclidean distance = " + silhouette); // Shows the result. System.out.println("Cluster Centers: "); http://git-wip-us.apache.org/repos/asf/spark/blob/36307b1e/examples/src/main/python/ml/bisecting_k_means_example.py ---------------------------------------------------------------------- diff --git a/examples/src/main/python/ml/bisecting_k_means_example.py b/examples/src/main/python/ml/bisecting_k_means_example.py index 7842d20..82adb33 100644 --- a/examples/src/main/python/ml/bisecting_k_means_example.py +++ b/examples/src/main/python/ml/bisecting_k_means_example.py @@ -24,6 +24,7 @@ from __future__ import print_function # $example on$ from pyspark.ml.clustering import BisectingKMeans +from pyspark.ml.evaluation import ClusteringEvaluator # $example off$ from pyspark.sql import SparkSession @@ -41,9 +42,14 @@ if __name__ == "__main__": bkm = BisectingKMeans().setK(2).setSeed(1) model = bkm.fit(dataset) - # Evaluate clustering. - cost = model.computeCost(dataset) - print("Within Set Sum of Squared Errors = " + str(cost)) + # Make predictions + predictions = model.transform(dataset) + + # Evaluate clustering by computing Silhouette score + evaluator = ClusteringEvaluator() + + silhouette = evaluator.evaluate(predictions) + print("Silhouette with squared euclidean distance = " + str(silhouette)) # Shows the result. print("Cluster Centers: ") http://git-wip-us.apache.org/repos/asf/spark/blob/36307b1e/examples/src/main/scala/org/apache/spark/examples/ml/BisectingKMeansExample.scala ---------------------------------------------------------------------- diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/BisectingKMeansExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/BisectingKMeansExample.scala index 5f8f2c9..14e13df 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/BisectingKMeansExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/BisectingKMeansExample.scala @@ -21,6 +21,7 @@ package org.apache.spark.examples.ml // $example on$ import org.apache.spark.ml.clustering.BisectingKMeans +import org.apache.spark.ml.evaluation.ClusteringEvaluator // $example off$ import org.apache.spark.sql.SparkSession @@ -48,9 +49,14 @@ object BisectingKMeansExample { val bkm = new BisectingKMeans().setK(2).setSeed(1) val model = bkm.fit(dataset) - // Evaluate clustering. - val cost = model.computeCost(dataset) - println(s"Within Set Sum of Squared Errors = $cost") + // Make predictions + val predictions = model.transform(dataset) + + // Evaluate clustering by computing Silhouette score + val evaluator = new ClusteringEvaluator() + + val silhouette = evaluator.evaluate(predictions) + println(s"Silhouette with squared euclidean distance = $silhouette") // Shows the result. println("Cluster Centers: ") --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org