codope commented on code in PR #7847:
URL: https://github.com/apache/hudi/pull/7847#discussion_r1116550390
##########
hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieCatalogUtils.scala:
##########
@@ -17,8 +17,76 @@
package org.apache.spark.sql
+import org.apache.spark.sql.catalyst.catalog.CatalogTableType
+import org.apache.spark.sql.catalyst.{QualifiedTableName, TableIdentifier}
+
/**
* NOTE: Since support for [[TableCatalog]] was only added in Spark 3, this
trait
* is going to be an empty one simply serving as a placeholder (for
compatibility w/ Spark 2)
*/
trait HoodieCatalogUtils {}
+
+object HoodieCatalogUtils {
+
+ /**
+ * Please check scala-doc for other overloaded [[refreshTable()]] operation
+ */
+ def refreshTable(spark: SparkSession, qualifiedTableName: String): Unit = {
+ val tableId =
spark.sessionState.sqlParser.parseTableIdentifier(qualifiedTableName)
+ refreshTable(spark, tableId)
+ }
+
+ /**
+ * Refreshes metadata and flushes cached data (resolved [[LogicalPlan]]
representation,
+ * already loaded [[InMemoryRelation]]) for the table identified by
[[tableId]].
+ *
+ * This method is usually invoked at the ond of the write operation to make
sure cached
Review Comment:
```suggestion
* This method is usually invoked at the end of the write operation to
make sure cached
```
##########
hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieCatalogUtils.scala:
##########
@@ -17,8 +17,76 @@
package org.apache.spark.sql
+import org.apache.spark.sql.catalyst.catalog.CatalogTableType
+import org.apache.spark.sql.catalyst.{QualifiedTableName, TableIdentifier}
+
/**
* NOTE: Since support for [[TableCatalog]] was only added in Spark 3, this
trait
* is going to be an empty one simply serving as a placeholder (for
compatibility w/ Spark 2)
*/
trait HoodieCatalogUtils {}
+
+object HoodieCatalogUtils {
+
+ /**
+ * Please check scala-doc for other overloaded [[refreshTable()]] operation
+ */
+ def refreshTable(spark: SparkSession, qualifiedTableName: String): Unit = {
+ val tableId =
spark.sessionState.sqlParser.parseTableIdentifier(qualifiedTableName)
+ refreshTable(spark, tableId)
+ }
+
+ /**
+ * Refreshes metadata and flushes cached data (resolved [[LogicalPlan]]
representation,
+ * already loaded [[InMemoryRelation]]) for the table identified by
[[tableId]].
+ *
+ * This method is usually invoked at the ond of the write operation to make
sure cached
+ * data/metadata are synchronized with the state on storage.
+ *
+ * NOTE: PLEASE READ CAREFULLY BEFORE CHANGING
+ * This is borrowed from Spark 3.1.3 and modified to satisfy Hudi
needs:
Review Comment:
I am wondering what prompted Spark to change the behavior in version 3.2?
IMO, whatever Spark 3.1.3 was doing is more reasonable. What do we lose by not
sticking to 3.2 behavior?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]