cloud-fan commented on code in PR #52599:
URL: https://github.com/apache/spark/pull/52599#discussion_r2446657969
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala:
##########
@@ -59,23 +59,32 @@ class DataSourceV2Strategy(session: SparkSession) extends
Strategy with Predicat
import DataSourceV2Implicits._
import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+ private def cacheManager = session.sharedState.cacheManager
+
private def hadoopConf = session.sessionState.newHadoopConf()
- private def refreshCache(r: DataSourceV2Relation)(): Unit = {
- session.sharedState.cacheManager.recacheByPlan(session, r)
+ // recaches all cache entries without time travel for the given table
+ // after a write operation that moves the state of the table forward (e.g.
append, overwrite)
+ private def refreshCache(r: DataSourceV2Relation)(): Unit = r match {
+ case ExtractV2CatalogAndIdentifier(catalog, ident) =>
+ val nameParts = ident.toQualifiedNameParts(catalog)
+ cacheManager.recacheTableOrView(session, nameParts, includeTimeTravel =
false)
+ case _ =>
+ cacheManager.recacheByPlan(session, r)
}
+ // recaches all cache entries (including time travel) for the given table
Review Comment:
I can understand this for `REFRESH TABLE` and `REPLACE TABLE`, which we need
to fully refresh all the caches of the give table, in case of dramatic table
changes. But for other DDL commands, they just move the state of the table
forward, same as DML commands, why do we treat them differently?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]