Repository: spark Updated Branches: refs/heads/master bbc2ffc8a -> bcf7121ed
[TRIVIAL][ML] GMM unpersist RDD after training ## What changes were proposed in this pull request? unpersist `instances` after training ## How was this patch tested? existing tests Author: éçå³° <zhengruifeng@ZBMAC-C02VX5XWH.local> Closes #21562 from zhengruifeng/gmm_unpersist. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bcf7121e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bcf7121e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bcf7121e Branch: refs/heads/master Commit: bcf7121ed2283d88424863ac1d35393870eaae6b Parents: bbc2ffc Author: éçå³° <zhengruifeng@ZBMAC-C02VX5XWH.local> Authored: Sun Jul 15 20:14:17 2018 -0700 Committer: Felix Cheung <felixche...@apache.org> Committed: Sun Jul 15 20:14:17 2018 -0700 ---------------------------------------------------------------------- .../scala/org/apache/spark/ml/clustering/GaussianMixture.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/bcf7121e/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala index dae64ba..f0707b3 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala @@ -341,7 +341,7 @@ class GaussianMixture @Since("2.0.0") ( val sc = dataset.sparkSession.sparkContext val numClusters = $(k) - val instances: RDD[Vector] = dataset + val instances = dataset .select(DatasetUtils.columnToVector(dataset, getFeaturesCol)).rdd.map { case Row(features: Vector) => features }.cache() @@ -416,6 +416,7 @@ class GaussianMixture @Since("2.0.0") ( iter += 1 } + instances.unpersist(false) val gaussianDists = gaussians.map { case (mean, covVec) => val cov = GaussianMixture.unpackUpperTriangularMatrix(numFeatures, covVec.values) new MultivariateGaussian(mean, cov) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org