sunchao commented on code in PR #36067:
URL: https://github.com/apache/spark/pull/36067#discussion_r847632544
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala:
##########
@@ -480,6 +480,14 @@ case class AlterTableAddPartitionCommand(
if (addedSize > 0) {
val newStats = CatalogStatistics(sizeInBytes =
table.stats.get.sizeInBytes + addedSize)
catalog.alterTableStats(table.identifier, Some(newStats))
+
+ partitionSpecsAndLocs.foreach { case (partition, _) =>
+ val partitionSpec = partition.mapValues {
Review Comment:
I think we can just do `val partitionSpec = partition.mapValues(Some(_))`
here since it'll be handled later.
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala:
##########
@@ -53,7 +53,11 @@ class PathFilterIgnoreNonData(stagingDir: String) extends
PathFilter with Serial
object CommandUtils extends Logging {
/** Change statistics after changing data by commands. */
- def updateTableStats(sparkSession: SparkSession, table: CatalogTable): Unit
= {
+ def updateTableStats(
+ sparkSession: SparkSession,
+ table: CatalogTable,
+ partitionSpec: Map[String, Option[String]] = Map.empty,
+ withAutoPartitionStats: Boolean = true): Unit = {
Review Comment:
maybe make this default to false so the existing calls
`updateTableStats(session, table)` can still maintain the existing behavior.
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala:
##########
@@ -62,6 +66,20 @@ object CommandUtils extends Logging {
if (isNewStats) {
val newStats = CatalogStatistics(sizeInBytes = newSize)
catalog.alterTableStats(table.identifier, Some(newStats))
+
+ if (!isDropPartition && table.partitionColumnNames.nonEmpty) {
Review Comment:
I think here at line 64 we've already calculated size for each partition so
it'd be nice if we can just re-use it.
Perhaps `CommandUtils.calculateTotalSize` can return a map from partition to
its size, so that we can use the result and filter based on the input
`partitionSpecs`.
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala:
##########
@@ -53,7 +53,11 @@ class PathFilterIgnoreNonData(stagingDir: String) extends
PathFilter with Serial
object CommandUtils extends Logging {
/** Change statistics after changing data by commands. */
- def updateTableStats(sparkSession: SparkSession, table: CatalogTable): Unit
= {
+ def updateTableStats(
+ sparkSession: SparkSession,
+ table: CatalogTable,
+ partitionSpecs: Iterable[Map[String, Option[String]]] = Iterable.empty,
+ isDropPartition: Boolean = false): Unit = {
Review Comment:
can we add some comments to this method and its parameters? as it has became
more complex than before now.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]