Repository: spark Updated Branches: refs/heads/master 4e9e6aee4 -> afc364146
[SPARK-22905][ML][FOLLOWUP] Fix GaussianMixtureModel save ## What changes were proposed in this pull request? make sure model data is stored in order. WeichenXu123 ## How was this patch tested? existing tests Author: Zheng RuiFeng <[email protected]> Closes #20113 from zhengruifeng/gmm_save. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/afc36414 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/afc36414 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/afc36414 Branch: refs/heads/master Commit: afc36414601d3f1a1946ccf2c630f43b7b7246a8 Parents: 4e9e6ae Author: Zheng RuiFeng <[email protected]> Authored: Fri Dec 29 10:08:03 2017 -0800 Committer: Joseph K. Bradley <[email protected]> Committed: Fri Dec 29 10:08:03 2017 -0800 ---------------------------------------------------------------------- .../org/apache/spark/mllib/clustering/GaussianMixtureModel.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/afc36414/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala index afbe4f9..1933d54 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala @@ -154,7 +154,7 @@ object GaussianMixtureModel extends Loader[GaussianMixtureModel] { val dataArray = Array.tabulate(weights.length) { i => Data(weights(i), gaussians(i).mu, gaussians(i).sigma) } - spark.createDataFrame(dataArray).repartition(1).write.parquet(Loader.dataPath(path)) + spark.createDataFrame(sc.makeRDD(dataArray, 1)).write.parquet(Loader.dataPath(path)) } def load(sc: SparkContext, path: String): GaussianMixtureModel = { --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
