This is an automated email from the ASF dual-hosted git repository.
weichenxu123 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new a0c9ab63f3b [SPARK-45386][SQL]: Fix correctness issue with persist
using StorageLevel.NONE on Dataset (#43188)
a0c9ab63f3b is described below
commit a0c9ab63f3bcf4c9bb56c407375ce1c8cc26fb02
Author: Emil Ejbyfeldt <[email protected]>
AuthorDate: Mon Oct 2 11:36:53 2023 +0200
[SPARK-45386][SQL]: Fix correctness issue with persist using
StorageLevel.NONE on Dataset (#43188)
* SPARK-45386: Fix correctness issue with StorageLevel.NONE
* Move to CacheManager
* Add comment
---
.../main/scala/org/apache/spark/sql/execution/CacheManager.scala | 4 +++-
sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala | 6 ++++++
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
index 064819275e0..e906c74f8a5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -113,7 +113,9 @@ class CacheManager extends Logging with
AdaptiveSparkPlanHelper {
planToCache: LogicalPlan,
tableName: Option[String],
storageLevel: StorageLevel): Unit = {
- if (lookupCachedData(planToCache).nonEmpty) {
+ if (storageLevel == StorageLevel.NONE) {
+ // Do nothing for StorageLevel.NONE since it will not actually cache any
data.
+ } else if (lookupCachedData(planToCache).nonEmpty) {
logWarning("Asked to cache already cached data.")
} else {
val sessionWithConfigsOff = getOrCloneSessionWithConfigsOff(spark)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 04e619fa908..8fb25e120f5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -47,6 +47,7 @@ import org.apache.spark.sql.functions._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.sql.types._
+import org.apache.spark.storage.StorageLevel
case class TestDataPoint(x: Int, y: Double, s: String, t: TestDataPoint2)
case class TestDataPoint2(x: Int, s: String)
@@ -2604,6 +2605,11 @@ class DatasetSuite extends QueryTest
parameters = Map("cls" -> classOf[Array[Int]].getName))
}
}
+
+ test("SPARK-45386: persist with StorageLevel.NONE should give correct
count") {
+ val ds = Seq(1, 2).toDS().persist(StorageLevel.NONE)
+ assert(ds.count() == 2)
+ }
}
class DatasetLargeResultCollectingSuite extends QueryTest
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]