[spark] branch master updated: [SPARK-45386][SQL]: Fix correctness issue with persist using StorageLevel.NONE on Dataset (#43188)

weichenxu123 Mon, 02 Oct 2023 02:38:33 -0700

This is an automated email from the ASF dual-hosted git repository.

weichenxu123 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new a0c9ab63f3b [SPARK-45386][SQL]: Fix correctness issue with persist 
using StorageLevel.NONE on Dataset (#43188)
a0c9ab63f3b is described below

commit a0c9ab63f3bcf4c9bb56c407375ce1c8cc26fb02
Author: Emil Ejbyfeldt <[email protected]>
AuthorDate: Mon Oct 2 11:36:53 2023 +0200

    [SPARK-45386][SQL]: Fix correctness issue with persist using 
StorageLevel.NONE on Dataset (#43188)
    
    * SPARK-45386: Fix correctness issue with StorageLevel.NONE
    
    * Move to CacheManager
    
    * Add comment
---
 .../main/scala/org/apache/spark/sql/execution/CacheManager.scala    | 4 +++-
 sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala     | 6 ++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
index 064819275e0..e906c74f8a5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -113,7 +113,9 @@ class CacheManager extends Logging with 
AdaptiveSparkPlanHelper {
       planToCache: LogicalPlan,
       tableName: Option[String],
       storageLevel: StorageLevel): Unit = {
-    if (lookupCachedData(planToCache).nonEmpty) {
+    if (storageLevel == StorageLevel.NONE) {
+      // Do nothing for StorageLevel.NONE since it will not actually cache any 
data.
+    } else if (lookupCachedData(planToCache).nonEmpty) {
       logWarning("Asked to cache already cached data.")
     } else {
       val sessionWithConfigsOff = getOrCloneSessionWithConfigsOff(spark)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 04e619fa908..8fb25e120f5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -47,6 +47,7 @@ import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
+import org.apache.spark.storage.StorageLevel
 
 case class TestDataPoint(x: Int, y: Double, s: String, t: TestDataPoint2)
 case class TestDataPoint2(x: Int, s: String)
@@ -2604,6 +2605,11 @@ class DatasetSuite extends QueryTest
         parameters = Map("cls" -> classOf[Array[Int]].getName))
     }
   }
+
+  test("SPARK-45386: persist with StorageLevel.NONE should give correct 
count") {
+    val ds = Seq(1, 2).toDS().persist(StorageLevel.NONE)
+    assert(ds.count() == 2)
+  }
 }
 
 class DatasetLargeResultCollectingSuite extends QueryTest


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[spark] branch master updated: [SPARK-45386][SQL]: Fix correctness issue with persist using StorageLevel.NONE on Dataset (#43188)

Reply via email to