cloud-fan commented on code in PR #52599:
URL: https://github.com/apache/spark/pull/52599#discussion_r2446657969
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala:
##########
@@ -59,23 +59,32 @@ class DataSourceV2Strategy(session: SparkSession) extends
Strategy with Predicat
import DataSourceV2Implicits._
import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+ private def cacheManager = session.sharedState.cacheManager
+
private def hadoopConf = session.sessionState.newHadoopConf()
- private def refreshCache(r: DataSourceV2Relation)(): Unit = {
- session.sharedState.cacheManager.recacheByPlan(session, r)
+ // recaches all cache entries without time travel for the given table
+ // after a write operation that moves the state of the table forward (e.g.
append, overwrite)
+ private def refreshCache(r: DataSourceV2Relation)(): Unit = r match {
+ case ExtractV2CatalogAndIdentifier(catalog, ident) =>
+ val nameParts = ident.toQualifiedNameParts(catalog)
+ cacheManager.recacheTableOrView(session, nameParts, includeTimeTravel =
false)
+ case _ =>
+ cacheManager.recacheByPlan(session, r)
}
+ // recaches all cache entries (including time travel) for the given table
Review Comment:
I can understand this for `REFRESH TABLE`, which we need to fully refresh
all the caches of the give table, in case of dramatic table changes. But for
other DDL commands, they just move the state of the table forward, same as DML
commands, why do we treat them differently?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]