dongjoon-hyun commented on a change in pull request #27185: [SPARK-30494][SQL]
Fix the leak of cached data when replace an existing temp view
URL: https://github.com/apache/spark/pull/27185#discussion_r396217365
##########
File path: sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
##########
@@ -1122,4 +1122,47 @@ class CachedTableSuite extends QueryTest with
SQLTestUtils
assert(!spark.catalog.isCached("t1"))
}
}
+
+ test("SPARK-30494 avoid duplicated cached RDD when replace an existing
view") {
+ withTempView("tempView") {
+ spark.catalog.clearCache()
+ sql("create or replace temporary view tempView as select 1")
+ sql("cache table tempView")
+ assert(spark.sharedState.cacheManager.lookupCachedData(sql("select
1")).isDefined)
+ sql("create or replace temporary view tempView as select 1, 2")
+ assert(spark.sharedState.cacheManager.lookupCachedData(sql("select
1")).isEmpty)
+ sql("cache table tempView")
+ assert(spark.sharedState.cacheManager.lookupCachedData(sql("select 1,
2")).isDefined)
+ assert(spark.sharedState.cacheManager.lookupCachedData(sql("select
1")).isEmpty)
+ }
+
+ withGlobalTempView("tempGlobalTempView") {
+ spark.catalog.clearCache()
+ sql("create or replace global temporary view tempGlobalTempView as
select 1")
+ sql("cache table global_temp.tempGlobalTempView")
+ assert(spark.sharedState.cacheManager.lookupCachedData(sql("select
1")).isDefined)
+ sql("create or replace global temporary view tempGlobalTempView as
select 1, 2")
+ assert(spark.sharedState.cacheManager.lookupCachedData(sql("select
1")).isEmpty)
+ sql("cache table global_temp.tempGlobalTempView")
+ assert(spark.sharedState.cacheManager.lookupCachedData(sql("select 1,
2")).isDefined)
+ assert(spark.sharedState.cacheManager.lookupCachedData(sql("select
1")).isEmpty)
+ }
+
+ withView("view1") {
+ spark.catalog.clearCache()
+ sql("create or replace view view1 as select 1")
+ sql("cache table view1")
+ sql("create or replace view view1 as select 1, 2")
+ sql("cache table view1")
+ // the cached plan of persisted view likes below,
+ // so we cannot use the same assertion of temp view.
+ // SubqueryAlias
+ // |
+ // + View
+ // |
+ // + Project[1 AS 1]
+ spark.sharedState.cacheManager.uncacheQuery(spark.table("view1"),
cascade = false)
+ assert(spark.sharedState.cacheManager.isEmpty)
Review comment:
Then, please remove this misleading test case between line 1149 and 1165.
> no cached data leak for persisted view
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]