Github user wzhfy commented on a diff in the pull request:
https://github.com/apache/spark/pull/20430#discussion_r165349231
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
---
@@ -34,16 +34,12 @@ object CommandUtils extends Logging {
/** Change statistics after changing data by commands. */
def updateTableStats(sparkSession: SparkSession, table: CatalogTable):
Unit = {
- if (table.stats.nonEmpty) {
+ if (sparkSession.sessionState.conf.autoSizeUpdateEnabled) {
val catalog = sparkSession.sessionState.catalog
- if (sparkSession.sessionState.conf.autoSizeUpdateEnabled) {
- val newTable = catalog.getTableMetadata(table.identifier)
- val newSize =
CommandUtils.calculateTotalSize(sparkSession.sessionState, newTable)
- val newStats = CatalogStatistics(sizeInBytes = newSize)
- catalog.alterTableStats(table.identifier, Some(newStats))
- } else {
- catalog.alterTableStats(table.identifier, None)
--- End diff --
@felixcheung if the data of a table has been changed and auto size update
is disabled, the stats become inaccurate, so we should remove them.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]