LantaoJin commented on a change in pull request #27185: [SPARK-30494][SQL]
Avoid duplicated cached RDD when replace an existing view
URL: https://github.com/apache/spark/pull/27185#discussion_r396061024
##########
File path: sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
##########
@@ -1122,4 +1122,47 @@ class CachedTableSuite extends QueryTest with
SQLTestUtils
assert(!spark.catalog.isCached("t1"))
}
}
+
+ test("SPARK-30494 avoid duplicated cached RDD when replace an existing
view") {
+ withTempView("tempView") {
+ spark.catalog.clearCache()
+ sql("create or replace temporary view tempView as select 1")
+ sql("cache table tempView")
+ assert(spark.sharedState.cacheManager.lookupCachedData(sql("select
1")).isDefined)
+ sql("create or replace temporary view tempView as select 1, 2")
+ assert(spark.sharedState.cacheManager.lookupCachedData(sql("select
1")).isEmpty)
+ sql("cache table tempView")
+ assert(spark.sharedState.cacheManager.lookupCachedData(sql("select 1,
2")).isDefined)
+ assert(spark.sharedState.cacheManager.lookupCachedData(sql("select
1")).isEmpty)
+ }
+
+ withGlobalTempView("tempGlobalTempView") {
+ spark.catalog.clearCache()
+ sql("create or replace global temporary view tempGlobalTempView as
select 1")
+ sql("cache table global_temp.tempGlobalTempView")
+ assert(spark.sharedState.cacheManager.lookupCachedData(sql("select
1")).isDefined)
+ sql("create or replace global temporary view tempGlobalTempView as
select 1, 2")
+ assert(spark.sharedState.cacheManager.lookupCachedData(sql("select
1")).isEmpty)
+ sql("cache table global_temp.tempGlobalTempView")
+ assert(spark.sharedState.cacheManager.lookupCachedData(sql("select 1,
2")).isDefined)
+ assert(spark.sharedState.cacheManager.lookupCachedData(sql("select
1")).isEmpty)
+ }
+
+ withView("view1") {
+ spark.catalog.clearCache()
+ sql("create or replace view view1 as select 1")
+ sql("cache table view1")
+ sql("create or replace view view1 as select 1, 2")
+ sql("cache table view1")
+ // the cached plan of persisted view likes below,
+ // so we cannot use the same assertion of temp view.
+ // SubqueryAlias
+ // |
+ // + View
+ // |
+ // + Project[1 AS 1]
+ spark.sharedState.cacheManager.uncacheQuery(spark.table("view1"),
cascade = false)
+ assert(spark.sharedState.cacheManager.isEmpty)
Review comment:
1164 and 1165 is to make sure our patch is working, no cached data leak for
persisted view. Without this patch, the assert 1165 will fails. So I don't
think it's an invalid test.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]