Repository: spark Updated Branches: refs/heads/branch-2.2 63098dc31 -> b606dc177
[SPARK-18608][ML] Fix double caching ## What changes were proposed in this pull request? `df.rdd.getStorageLevel` => `df.storageLevel` using cmd `find . -name '*.scala' | xargs -i bash -c 'egrep -in "\.rdd\.getStorageLevel" {} && echo {}'` to make sure all algs involved in this issue are fixed. Previous discussion in other PRs: https://github.com/apache/spark/pull/19107, https://github.com/apache/spark/pull/17014 ## How was this patch tested? existing tests Author: Zheng RuiFeng <ruife...@foxmail.com> Closes #19197 from zhengruifeng/double_caching. (cherry picked from commit c5f9b89dda40ffaa4622a7ba2b3d0605dbe815c0) Signed-off-by: Joseph K. Bradley <jos...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b606dc17 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b606dc17 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b606dc17 Branch: refs/heads/branch-2.2 Commit: b606dc177e177bdbf99e72638eb8baec12e9fb53 Parents: 63098dc Author: Zheng RuiFeng <ruife...@foxmail.com> Authored: Tue Sep 12 11:37:05 2017 -0700 Committer: Joseph K. Bradley <jos...@databricks.com> Committed: Tue Sep 12 11:37:22 2017 -0700 ---------------------------------------------------------------------- .../org/apache/spark/ml/classification/LogisticRegression.scala | 2 +- .../scala/org/apache/spark/ml/classification/OneVsRest.scala | 4 ++-- mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala | 2 +- .../org/apache/spark/ml/regression/AFTSurvivalRegression.scala | 2 +- .../org/apache/spark/ml/regression/IsotonicRegression.scala | 2 +- .../scala/org/apache/spark/ml/regression/LinearRegression.scala | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/b606dc17/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index 1de2373..e7f99fc 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -483,7 +483,7 @@ class LogisticRegression @Since("1.2.0") ( } override protected[spark] def train(dataset: Dataset[_]): LogisticRegressionModel = { - val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE + val handlePersistence = dataset.storageLevel == StorageLevel.NONE train(dataset, handlePersistence) } http://git-wip-us.apache.org/repos/asf/spark/blob/b606dc17/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala index 05b8c3a..f3aff4c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala @@ -164,7 +164,7 @@ final class OneVsRestModel private[ml] ( val newDataset = dataset.withColumn(accColName, initUDF()) // persist if underlying dataset is not persistent. - val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE + val handlePersistence = dataset.storageLevel == StorageLevel.NONE if (handlePersistence) { newDataset.persist(StorageLevel.MEMORY_AND_DISK) } @@ -347,7 +347,7 @@ final class OneVsRest @Since("1.4.0") ( } // persist if underlying dataset is not persistent. - val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE + val handlePersistence = dataset.storageLevel == StorageLevel.NONE if (handlePersistence) { multiclassLabeled.persist(StorageLevel.MEMORY_AND_DISK) } http://git-wip-us.apache.org/repos/asf/spark/blob/b606dc17/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala index e02b532..f2af7fe 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala @@ -304,7 +304,7 @@ class KMeans @Since("1.5.0") ( override def fit(dataset: Dataset[_]): KMeansModel = { transformSchema(dataset.schema, logging = true) - val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE + val handlePersistence = dataset.storageLevel == StorageLevel.NONE val instances: RDD[OldVector] = dataset.select(col($(featuresCol))).rdd.map { case Row(point: Vector) => OldVectors.fromML(point) } http://git-wip-us.apache.org/repos/asf/spark/blob/b606dc17/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala index 16821f3..4b46c38 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala @@ -213,7 +213,7 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S override def fit(dataset: Dataset[_]): AFTSurvivalRegressionModel = { transformSchema(dataset.schema, logging = true) val instances = extractAFTPoints(dataset) - val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE + val handlePersistence = dataset.storageLevel == StorageLevel.NONE if (handlePersistence) instances.persist(StorageLevel.MEMORY_AND_DISK) val featuresSummarizer = { http://git-wip-us.apache.org/repos/asf/spark/blob/b606dc17/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala index 529f66e..8faab52 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala @@ -165,7 +165,7 @@ class IsotonicRegression @Since("1.5.0") (@Since("1.5.0") override val uid: Stri transformSchema(dataset.schema, logging = true) // Extract columns from data. If dataset is persisted, do not persist oldDataset. val instances = extractWeightedLabeledPoints(dataset) - val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE + val handlePersistence = dataset.storageLevel == StorageLevel.NONE if (handlePersistence) instances.persist(StorageLevel.MEMORY_AND_DISK) val instr = Instrumentation.create(this, dataset) http://git-wip-us.apache.org/repos/asf/spark/blob/b606dc17/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala index eaad549..18fcedf 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala @@ -236,7 +236,7 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String return lrModel } - val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE + val handlePersistence = dataset.storageLevel == StorageLevel.NONE if (handlePersistence) instances.persist(StorageLevel.MEMORY_AND_DISK) val (featuresSummarizer, ySummarizer) = { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org